From fc13ed68c23976d7f92dd0c64dc080b7371852e6 Mon Sep 17 00:00:00 2001
From: pmannil <45764461+pmannil@users.noreply.github.com>
Date: Tue, 23 Jul 2019 22:34:30 +0530
Subject: [PATCH 1/2] Created using Colaboratory
---
module2.ipynb | 1637 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 1637 insertions(+)
create mode 100644 module2.ipynb
diff --git a/module2.ipynb b/module2.ipynb
new file mode 100644
index 00000000..573e3159
--- /dev/null
+++ b/module2.ipynb
@@ -0,0 +1,1637 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "module2.ipynb",
+ "version": "0.3.2",
+ "provenance": [],
+ "collapsed_sections": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "qmQOF7LIqSeI",
+ "colab_type": "code",
+ "outputId": "54ebb43f-1d54-4855-853f-178c3cb432dc",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ }
+ },
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/gdrive')"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "yvVHvIcFqW6i",
+ "colab_type": "code",
+ "outputId": "38033657-1d6a-4999-c6eb-f48cab2dd14f",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 74
+ }
+ },
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import os\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.ensemble import ExtraTreesClassifier\n",
+ "from sklearn import tree\n",
+ "from sklearn import svm\n",
+ "from sklearn.ensemble import GradientBoostingClassifier\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report\n",
+ "from imblearn.over_sampling import SMOTE\n",
+ "from imblearn.under_sampling import RandomUnderSampler\n",
+ "from google.colab import files"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/externals/six.py:31: DeprecationWarning: The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).\n",
+ " \"(https://pypi.org/project/six/).\", DeprecationWarning)\n"
+ ],
+ "name": "stderr"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "tPr-qj5cqj_T",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "df=pd.read_csv('/content/gdrive/My Drive/1.csv')"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "dTvIVJcWw6Nf",
+ "colab_type": "code",
+ "outputId": "24eb7918-16a0-4116-9076-aefa570b6bce",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 305
+ }
+ },
+ "source": [
+ "df.head()"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Cloudcover | \n",
+ " DewPointF | \n",
+ " Humidity | \n",
+ " Pressure | \n",
+ " Visibilty | \n",
+ " WeatherCode | \n",
+ " WindChillF | \n",
+ " WindDirDegree | \n",
+ " WindGustKmph | \n",
+ " WindSpeedKmph | \n",
+ " airport | \n",
+ " date | \n",
+ " precipMM | \n",
+ " tempF | \n",
+ " time | \n",
+ " Year | \n",
+ " Quarter | \n",
+ " Month | \n",
+ " DayofMonth | \n",
+ " FlightDate | \n",
+ " OriginAirportID | \n",
+ " Origin | \n",
+ " DestAirportID | \n",
+ " Dest | \n",
+ " CRSDepTime | \n",
+ " DepTime | \n",
+ " DepDelayMinutes | \n",
+ " DepDel15 | \n",
+ " CRSArrTime | \n",
+ " ArrTime | \n",
+ " ArrDelayMinutes | \n",
+ " ArrDel15 | \n",
+ " nearest_hoursArr | \n",
+ " nearest_hoursDep | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 57 | \n",
+ " 70 | \n",
+ " 89 | \n",
+ " 1021 | \n",
+ " 9 | \n",
+ " 176 | \n",
+ " 74 | \n",
+ " 147 | \n",
+ " 10 | \n",
+ " 5 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 2.5 | \n",
+ " 74 | \n",
+ " 0 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 13303 | \n",
+ " MIA | \n",
+ " 1630 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1739 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 87 | \n",
+ " 70 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 6 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 226 | \n",
+ " 7 | \n",
+ " 4 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 3.4 | \n",
+ " 71 | \n",
+ " 500 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 13303 | \n",
+ " MIA | \n",
+ " 500 | \n",
+ " 503 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 603 | \n",
+ " 559 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 600 | \n",
+ " 500 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 87 | \n",
+ " 70 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 6 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 226 | \n",
+ " 7 | \n",
+ " 4 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 3.4 | \n",
+ " 71 | \n",
+ " 500 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 11618 | \n",
+ " EWR | \n",
+ " 530 | \n",
+ " 527 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 759 | \n",
+ " 741 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 800 | \n",
+ " 500 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 84 | \n",
+ " 71 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 5 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 240 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 2.1 | \n",
+ " 72 | \n",
+ " 600 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 11298 | \n",
+ " DFW | \n",
+ " 600 | \n",
+ " 612 | \n",
+ " 12.0 | \n",
+ " 0.0 | \n",
+ " 801 | \n",
+ " 800 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 800 | \n",
+ " 600 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 84 | \n",
+ " 71 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 5 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 240 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 2.1 | \n",
+ " 72 | \n",
+ " 600 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 12478 | \n",
+ " JFK | \n",
+ " 604 | \n",
+ " 625 | \n",
+ " 21.0 | \n",
+ " 1.0 | \n",
+ " 830 | \n",
+ " 850 | \n",
+ " 20.0 | \n",
+ " 1.0 | \n",
+ " 900 | \n",
+ " 600 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Cloudcover DewPointF ... nearest_hoursArr nearest_hoursDep\n",
+ "0 57 70 ... 0 0\n",
+ "1 87 70 ... 600 500\n",
+ "2 87 70 ... 800 500\n",
+ "3 84 71 ... 800 600\n",
+ "4 84 71 ... 900 600\n",
+ "\n",
+ "[5 rows x 34 columns]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "y-IaZ6c_xOk4",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "pf1=df.drop(['nearest_hoursArr', 'nearest_hoursDep'], axis=1)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "txHM39lKxOnw",
+ "colab_type": "code",
+ "outputId": "f2216878-066d-4510-8a12-f2f0ccb133e9",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 305
+ }
+ },
+ "source": [
+ "pf1.head()"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Cloudcover | \n",
+ " DewPointF | \n",
+ " Humidity | \n",
+ " Pressure | \n",
+ " Visibilty | \n",
+ " WeatherCode | \n",
+ " WindChillF | \n",
+ " WindDirDegree | \n",
+ " WindGustKmph | \n",
+ " WindSpeedKmph | \n",
+ " airport | \n",
+ " date | \n",
+ " precipMM | \n",
+ " tempF | \n",
+ " time | \n",
+ " Year | \n",
+ " Quarter | \n",
+ " Month | \n",
+ " DayofMonth | \n",
+ " FlightDate | \n",
+ " OriginAirportID | \n",
+ " Origin | \n",
+ " DestAirportID | \n",
+ " Dest | \n",
+ " CRSDepTime | \n",
+ " DepTime | \n",
+ " DepDelayMinutes | \n",
+ " DepDel15 | \n",
+ " CRSArrTime | \n",
+ " ArrTime | \n",
+ " ArrDelayMinutes | \n",
+ " ArrDel15 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 57 | \n",
+ " 70 | \n",
+ " 89 | \n",
+ " 1021 | \n",
+ " 9 | \n",
+ " 176 | \n",
+ " 74 | \n",
+ " 147 | \n",
+ " 10 | \n",
+ " 5 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 2.5 | \n",
+ " 74 | \n",
+ " 0 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 13303 | \n",
+ " MIA | \n",
+ " 1630 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1739 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 87 | \n",
+ " 70 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 6 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 226 | \n",
+ " 7 | \n",
+ " 4 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 3.4 | \n",
+ " 71 | \n",
+ " 500 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 13303 | \n",
+ " MIA | \n",
+ " 500 | \n",
+ " 503 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 603 | \n",
+ " 559 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 87 | \n",
+ " 70 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 6 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 226 | \n",
+ " 7 | \n",
+ " 4 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 3.4 | \n",
+ " 71 | \n",
+ " 500 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 11618 | \n",
+ " EWR | \n",
+ " 530 | \n",
+ " 527 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 759 | \n",
+ " 741 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 84 | \n",
+ " 71 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 5 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 240 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 2.1 | \n",
+ " 72 | \n",
+ " 600 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 11298 | \n",
+ " DFW | \n",
+ " 600 | \n",
+ " 612 | \n",
+ " 12.0 | \n",
+ " 0.0 | \n",
+ " 801 | \n",
+ " 800 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 84 | \n",
+ " 71 | \n",
+ " 97 | \n",
+ " 1019 | \n",
+ " 5 | \n",
+ " 302 | \n",
+ " 71 | \n",
+ " 240 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " MCO | \n",
+ " 2016-01-01 | \n",
+ " 2.1 | \n",
+ " 72 | \n",
+ " 600 | \n",
+ " 2016 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2016-01-01 | \n",
+ " 13204 | \n",
+ " MCO | \n",
+ " 12478 | \n",
+ " JFK | \n",
+ " 604 | \n",
+ " 625 | \n",
+ " 21.0 | \n",
+ " 1.0 | \n",
+ " 830 | \n",
+ " 850 | \n",
+ " 20.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Cloudcover DewPointF Humidity ... ArrTime ArrDelayMinutes ArrDel15\n",
+ "0 57 70 89 ... 0 0.0 0.0\n",
+ "1 87 70 97 ... 559 0.0 0.0\n",
+ "2 87 70 97 ... 741 0.0 0.0\n",
+ "3 84 71 97 ... 800 0.0 0.0\n",
+ "4 84 71 97 ... 850 20.0 1.0\n",
+ "\n",
+ "[5 rows x 32 columns]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "stzNOS1TwzG5",
+ "colab_type": "code",
+ "outputId": "53a62a3e-c98d-4c40-bf4b-d1459af9cd90",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ }
+ },
+ "source": [
+ "pf1.shape"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(1438104, 32)"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Vhff2XcxymX3",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "need=pf1.drop([\"airport\",\"date\",\"DepTime\",\"ArrTime\",\"DepDelayMinutes\",\"ArrDelayMinutes\",\"ArrDel15\",\"DepDel15\",\"FlightDate\"],axis=1)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "UO_eAKHD4y0l",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "k = LabelEncoder()\n",
+ "c = k.fit_transform(need[\"Dest\"])\n",
+ "need[\"Dest\"] = c\n",
+ "c = k.fit_transform(need[\"Origin\"])\n",
+ "need[\"Origin\"] = c\n",
+ "c = k.fit_transform(need[\"WeatherCode\"])\n",
+ "need[\"WeatherCode\"] = c\n",
+ "f=np.asarray(need)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "mFpWvBj6x6lC",
+ "colab_type": "code",
+ "outputId": "90df5805-cd26-4e28-f606-28b921a681d9",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ }
+ },
+ "source": [
+ "pf1['DepDel15']=pf1.DepDel15.astype(int)\n",
+ "l=np.asarray(pf1[\"DepDel15\"])\n",
+ "l"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([0, 0, 0, ..., 1, 0, 0])"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "MFFaA4VE6cps",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "need.to_csv(\"/content/gdrive/My Drive/4.csv\")"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "7Fd3ODR385nW",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "f_train, f_test, l_train, l_test = train_test_split(f, l, test_size=0.20, random_state=42)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "REiK6VhAJ3U_",
+ "colab_type": "text"
+ },
+ "source": [
+ "**EXTRA TREES**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "bNgm8RKUKsJc",
+ "colab_type": "code",
+ "outputId": "31a92513-61b5-40bd-ee41-af339cd474af",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 74
+ }
+ },
+ "source": [
+ "#before sampling\n",
+ "et = ExtraTreesClassifier()\n",
+ "et.fit(f_train,l_train)\n"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
+ " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
+ ],
+ "name": "stderr"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "dCZJfFgSMhAa",
+ "colab_type": "code",
+ "outputId": "d44111d6-ba10-464c-968c-267835fac336",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 244
+ }
+ },
+ "source": [
+ "pred = et.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "NameError",
+ "evalue": "ignored",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0met\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprecision\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprecision_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mrecall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecall_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mf1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf1_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"Precision:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecision\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'et' is not defined"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "51QiB7EMM51I",
+ "colab_type": "code",
+ "outputId": "204d38a3-8704-4db8-96ee-6a3c3d48b5ee",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 181
+ }
+ },
+ "source": [
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.87 0.95 0.91 228878\n",
+ " 1 0.71 0.45 0.55 58743\n",
+ "\n",
+ " accuracy 0.85 287621\n",
+ " macro avg 0.79 0.70 0.73 287621\n",
+ "weighted avg 0.84 0.85 0.84 287621\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "FWhcs0K2NYUh",
+ "colab_type": "code",
+ "outputId": "848d272e-9fe3-4823-c1f7-05981e399ba1",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 292
+ }
+ },
+ "source": [
+ "#oversampled output\n",
+ "sos = SMOTE(random_state=42)\n",
+ "x, y = sos.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "rg =ExtraTreesClassifier()\n",
+ "rg.fit(f_train,l_train)\n",
+ "pred = rg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
+ " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.9068624599831894\n",
+ "Recall: 0.904870251810186\n",
+ "F1 Score: 0.9047613202127298\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.88 0.94 0.91 228508\n",
+ " 1 0.94 0.87 0.90 229456\n",
+ "\n",
+ " accuracy 0.90 457964\n",
+ " macro avg 0.91 0.90 0.90 457964\n",
+ "weighted avg 0.91 0.90 0.90 457964\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "_l-8802v1XcA",
+ "colab_type": "code",
+ "outputId": "6f1ba080-41ec-4841-88f8-6218ac580653",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 292
+ }
+ },
+ "source": [
+ "\n",
+ "# Random Undersampling\n",
+ "ru = RandomUnderSampler(random_state=42)\n",
+ "x, y = ru.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "r =ExtraTreesClassifier()\n",
+ "r.fit(f_train,l_train)\n",
+ "pred = r.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
+ " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.7472095814587808\n",
+ "Recall: 0.7444448234110405\n",
+ "F1 Score: 0.7437519324824997\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.72 0.80 0.76 58571\n",
+ " 1 0.77 0.69 0.73 58707\n",
+ "\n",
+ " accuracy 0.74 117278\n",
+ " macro avg 0.75 0.74 0.74 117278\n",
+ "weighted avg 0.75 0.74 0.74 117278\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "n4zyuBjQJgLA",
+ "colab_type": "text"
+ },
+ "source": [
+ "**RANDOM FOREST**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "dOFgV7AT9b7B",
+ "colab_type": "code",
+ "outputId": "b4f072de-5cd4-4c42-8810-febbaf141067",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 201
+ }
+ },
+ "source": [
+ "#before sampling\n",
+ "ran = RandomForestClassifier()\n",
+ "ran.fit(f_train,l_train)\n"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
+ " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
+ " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
+ " min_impurity_decrease=0.0, min_impurity_split=None,\n",
+ " min_samples_leaf=1, min_samples_split=2,\n",
+ " min_weight_fraction_leaf=0.0, n_estimators=10,\n",
+ " n_jobs=None, oob_score=False, random_state=None,\n",
+ " verbose=0, warm_start=False)"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "0b4C4gYb90kF",
+ "colab_type": "code",
+ "outputId": "f266aa34-dac5-4142-e4d6-c910f13ed814",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 72
+ }
+ },
+ "source": [
+ "pred = ran.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.7908160255096451\n",
+ "Recall: 0.7880079810365115\n",
+ "F1 Score: 0.7875114091747006\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "6lfk_-QPNBSp",
+ "colab_type": "code",
+ "outputId": "258d7f63-0288-47f1-8381-14cd5542d576",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 292
+ }
+ },
+ "source": [
+ "#oversampled output\n",
+ "sos = SMOTE(random_state=42)\n",
+ "x, y = sos.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "rg = RandomForestClassifier()\n",
+ "rg.fit(f_train,l_train)\n",
+ "pred = rg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
+ " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.9146826332086521\n",
+ "Recall: 0.9114886759657964\n",
+ "F1 Score: 0.9113267780959348\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.88 0.96 0.92 228508\n",
+ " 1 0.95 0.87 0.91 229456\n",
+ "\n",
+ " accuracy 0.91 457964\n",
+ " macro avg 0.91 0.91 0.91 457964\n",
+ "weighted avg 0.91 0.91 0.91 457964\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "IQyqICuh4Rug",
+ "colab_type": "text"
+ },
+ "source": [
+ "**LOGISTIC REGRESSION**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "JAXLCGcg4FPm",
+ "colab_type": "code",
+ "outputId": "3e574e8a-6b7e-4389-f615-884e7eeae5af",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 165
+ }
+ },
+ "source": [
+ "#before sampling\n",
+ "lg = LogisticRegression()\n",
+ "lg.fit(f_train,l_train)\n"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+ " FutureWarning)\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+ " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
+ " multi_class='warn', n_jobs=None, penalty='l2',\n",
+ " random_state=None, solver='warn', tol=0.0001, verbose=0,\n",
+ " warm_start=False)"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 21
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "1SfMUDnJ4ytt",
+ "colab_type": "code",
+ "outputId": "bfd786cf-17ce-4cd9-f90c-34de53419368",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 72
+ }
+ },
+ "source": [
+ "pred = lg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.6153303766290811\n",
+ "Recall: 0.6150046728563817\n",
+ "F1 Score: 0.6146723066459795\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "MFpprCUv5IFC",
+ "colab_type": "code",
+ "outputId": "0b67b00e-edb5-409e-d633-2d42ba5ca8d3",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 292
+ }
+ },
+ "source": [
+ "# undersampled output\n",
+ "rus = RandomUnderSampler(random_state=42)\n",
+ "x, y = rus.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "lg = LogisticRegression()\n",
+ "lg.fit(f_train,l_train)\n",
+ "pred = rg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+ " FutureWarning)\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.934776416365854\n",
+ "Recall: 0.9280427701700233\n",
+ "F1 Score: 0.9277686765240396\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.88 0.99 0.93 58571\n",
+ " 1 0.99 0.87 0.92 58707\n",
+ "\n",
+ " accuracy 0.93 117278\n",
+ " macro avg 0.93 0.93 0.93 117278\n",
+ "weighted avg 0.93 0.93 0.93 117278\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "YQMvmoss5mgK",
+ "colab_type": "code",
+ "outputId": "5f39c5ce-483c-4b18-e448-94e9ac6e5abf",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 292
+ }
+ },
+ "source": [
+ "#oversampled output\n",
+ "sos = SMOTE(random_state=42)\n",
+ "x, y = sos.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "lg = LogisticRegression()\n",
+ "lg.fit(f_train,l_train)\n",
+ "pred = rg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+ " FutureWarning)\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.9146826332086521\n",
+ "Recall: 0.9114886759657964\n",
+ "F1 Score: 0.9113267780959348\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.88 0.96 0.92 228508\n",
+ " 1 0.95 0.87 0.91 229456\n",
+ "\n",
+ " accuracy 0.91 457964\n",
+ " macro avg 0.91 0.91 0.91 457964\n",
+ "weighted avg 0.91 0.91 0.91 457964\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "HJdqEtOI5_Dc",
+ "colab_type": "text"
+ },
+ "source": [
+ "**DECISION TREE**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "pBb344bD6X7C",
+ "colab_type": "code",
+ "outputId": "2fb2420a-a225-410f-a1ff-187e0f12fc9a",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 126
+ }
+ },
+ "source": [
+ "#before sampling\n",
+ "dg = tree.DecisionTreeClassifier()\n",
+ "dg.fit(f_train,l_train)\n"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
+ " max_features=None, max_leaf_nodes=None,\n",
+ " min_impurity_decrease=0.0, min_impurity_split=None,\n",
+ " min_samples_leaf=1, min_samples_split=2,\n",
+ " min_weight_fraction_leaf=0.0, presort=False,\n",
+ " random_state=None, splitter='best')"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 25
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "6v0jyL876pXi",
+ "colab_type": "code",
+ "outputId": "0c1a6773-ca62-4079-ebb1-24a35146f07b",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 72
+ }
+ },
+ "source": [
+ "pred = dg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.9099632535050411\n",
+ "Recall: 0.9099405193421317\n",
+ "F1 Score: 0.9099384765292666\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "yWOMxf4l65o0",
+ "colab_type": "code",
+ "outputId": "8bfabf59-a1f8-4cc5-9301-6541ad09548a",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 235
+ }
+ },
+ "source": [
+ "# undersampled output\n",
+ "rus = RandomUnderSampler(random_state=42)\n",
+ "x, y = rus.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "dg=tree.DecisionTreeClassifier()\n",
+ "dg.fit(f_train,l_train)\n",
+ "pred = dg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.7962277290798526\n",
+ "Recall: 0.7962277665035216\n",
+ "F1 Score: 0.796227680870621\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.80 0.80 0.80 58571\n",
+ " 1 0.80 0.80 0.80 58707\n",
+ "\n",
+ " accuracy 0.80 117278\n",
+ " macro avg 0.80 0.80 0.80 117278\n",
+ "weighted avg 0.80 0.80 0.80 117278\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Z5mZjeS77RbE",
+ "colab_type": "code",
+ "outputId": "1fa6778c-c412-4c0e-ccbf-b85f4d2b9f1d",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 235
+ }
+ },
+ "source": [
+ "#oversampled output\n",
+ "sos = SMOTE(random_state=42)\n",
+ "x, y = sos.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "dg=tree.DecisionTreeClassifier()\n",
+ "dg.fit(f_train,l_train)\n",
+ "pred = dg.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Precision: 0.9095492394117647\n",
+ "Recall: 0.909527823147671\n",
+ "F1 Score: 0.9095258643003094\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.91 0.91 0.91 228508\n",
+ " 1 0.91 0.91 0.91 229456\n",
+ "\n",
+ " accuracy 0.91 457964\n",
+ " macro avg 0.91 0.91 0.91 457964\n",
+ "weighted avg 0.91 0.91 0.91 457964\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BXF3EMUD9ix_",
+ "colab_type": "text"
+ },
+ "source": [
+ "**GRADIENT BOOSTING**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "zVCw5IDa_IU3",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "#before sampling\n",
+ "gb = GradientBoostingClassifier()\n",
+ "gb.fit(f_train,l_train)\n"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "AjsubeaO_khM",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "pred = gb.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "nXRgEHI7_nIi",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "# undersampled output\n",
+ "rus = RandomUnderSampler(random_state=42)\n",
+ "x, y = rus.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "gb = GradientBoostingClassifier()\n",
+ "gb.fit(f_train,l_train)\n",
+ "pred = gb.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "3XVZ5-7D_1uC",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "#oversampled output\n",
+ "sos = SMOTE(random_state=42)\n",
+ "x, y = sos.fit_resample(f, l)\n",
+ "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
+ "gb = GradientBoostingClassifier()\n",
+ "gb.fit(f_train,l_train)\n",
+ "pred = gb.predict(f_test)\n",
+ "precision = precision_score(l_test, pred, average=\"weighted\")\n",
+ "recall = recall_score(l_test, pred, average=\"weighted\")\n",
+ "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
+ "print (\"Precision:\", precision)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"F1 Score:\", f1)\n",
+ "print(classification_report(l_test, pred))"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
From 1d12eaee247cbd2b66cced0ea4103d2cb337645c Mon Sep 17 00:00:00 2001
From: pmannil <45764461+pmannil@users.noreply.github.com>
Date: Tue, 23 Jul 2019 22:35:05 +0530
Subject: [PATCH 2/2] Delete module2.ipynb
---
module2.ipynb | 1637 -------------------------------------------------
1 file changed, 1637 deletions(-)
delete mode 100644 module2.ipynb
diff --git a/module2.ipynb b/module2.ipynb
deleted file mode 100644
index 573e3159..00000000
--- a/module2.ipynb
+++ /dev/null
@@ -1,1637 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "name": "module2.ipynb",
- "version": "0.3.2",
- "provenance": [],
- "collapsed_sections": [],
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- }
- },
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "view-in-github",
- "colab_type": "text"
- },
- "source": [
- "
"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "qmQOF7LIqSeI",
- "colab_type": "code",
- "outputId": "54ebb43f-1d54-4855-853f-178c3cb432dc",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 35
- }
- },
- "source": [
- "from google.colab import drive\n",
- "drive.mount('/content/gdrive')"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "yvVHvIcFqW6i",
- "colab_type": "code",
- "outputId": "38033657-1d6a-4999-c6eb-f48cab2dd14f",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 74
- }
- },
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import os\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.ensemble import RandomForestClassifier\n",
- "from sklearn.linear_model import LogisticRegression\n",
- "from sklearn.ensemble import ExtraTreesClassifier\n",
- "from sklearn import tree\n",
- "from sklearn import svm\n",
- "from sklearn.ensemble import GradientBoostingClassifier\n",
- "from sklearn.metrics import accuracy_score\n",
- "from sklearn.preprocessing import LabelEncoder\n",
- "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report\n",
- "from imblearn.over_sampling import SMOTE\n",
- "from imblearn.under_sampling import RandomUnderSampler\n",
- "from google.colab import files"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/externals/six.py:31: DeprecationWarning: The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).\n",
- " \"(https://pypi.org/project/six/).\", DeprecationWarning)\n"
- ],
- "name": "stderr"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "tPr-qj5cqj_T",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "df=pd.read_csv('/content/gdrive/My Drive/1.csv')"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "dTvIVJcWw6Nf",
- "colab_type": "code",
- "outputId": "24eb7918-16a0-4116-9076-aefa570b6bce",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 305
- }
- },
- "source": [
- "df.head()"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Cloudcover | \n",
- " DewPointF | \n",
- " Humidity | \n",
- " Pressure | \n",
- " Visibilty | \n",
- " WeatherCode | \n",
- " WindChillF | \n",
- " WindDirDegree | \n",
- " WindGustKmph | \n",
- " WindSpeedKmph | \n",
- " airport | \n",
- " date | \n",
- " precipMM | \n",
- " tempF | \n",
- " time | \n",
- " Year | \n",
- " Quarter | \n",
- " Month | \n",
- " DayofMonth | \n",
- " FlightDate | \n",
- " OriginAirportID | \n",
- " Origin | \n",
- " DestAirportID | \n",
- " Dest | \n",
- " CRSDepTime | \n",
- " DepTime | \n",
- " DepDelayMinutes | \n",
- " DepDel15 | \n",
- " CRSArrTime | \n",
- " ArrTime | \n",
- " ArrDelayMinutes | \n",
- " ArrDel15 | \n",
- " nearest_hoursArr | \n",
- " nearest_hoursDep | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 57 | \n",
- " 70 | \n",
- " 89 | \n",
- " 1021 | \n",
- " 9 | \n",
- " 176 | \n",
- " 74 | \n",
- " 147 | \n",
- " 10 | \n",
- " 5 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 2.5 | \n",
- " 74 | \n",
- " 0 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 13303 | \n",
- " MIA | \n",
- " 1630 | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 1739 | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 87 | \n",
- " 70 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 6 | \n",
- " 302 | \n",
- " 71 | \n",
- " 226 | \n",
- " 7 | \n",
- " 4 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 3.4 | \n",
- " 71 | \n",
- " 500 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 13303 | \n",
- " MIA | \n",
- " 500 | \n",
- " 503 | \n",
- " 3.0 | \n",
- " 0.0 | \n",
- " 603 | \n",
- " 559 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 600 | \n",
- " 500 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 87 | \n",
- " 70 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 6 | \n",
- " 302 | \n",
- " 71 | \n",
- " 226 | \n",
- " 7 | \n",
- " 4 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 3.4 | \n",
- " 71 | \n",
- " 500 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 11618 | \n",
- " EWR | \n",
- " 530 | \n",
- " 527 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 759 | \n",
- " 741 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 800 | \n",
- " 500 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 84 | \n",
- " 71 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 5 | \n",
- " 302 | \n",
- " 71 | \n",
- " 240 | \n",
- " 6 | \n",
- " 3 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 2.1 | \n",
- " 72 | \n",
- " 600 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 11298 | \n",
- " DFW | \n",
- " 600 | \n",
- " 612 | \n",
- " 12.0 | \n",
- " 0.0 | \n",
- " 801 | \n",
- " 800 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 800 | \n",
- " 600 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 84 | \n",
- " 71 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 5 | \n",
- " 302 | \n",
- " 71 | \n",
- " 240 | \n",
- " 6 | \n",
- " 3 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 2.1 | \n",
- " 72 | \n",
- " 600 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 12478 | \n",
- " JFK | \n",
- " 604 | \n",
- " 625 | \n",
- " 21.0 | \n",
- " 1.0 | \n",
- " 830 | \n",
- " 850 | \n",
- " 20.0 | \n",
- " 1.0 | \n",
- " 900 | \n",
- " 600 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Cloudcover DewPointF ... nearest_hoursArr nearest_hoursDep\n",
- "0 57 70 ... 0 0\n",
- "1 87 70 ... 600 500\n",
- "2 87 70 ... 800 500\n",
- "3 84 71 ... 800 600\n",
- "4 84 71 ... 900 600\n",
- "\n",
- "[5 rows x 34 columns]"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 6
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "y-IaZ6c_xOk4",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "pf1=df.drop(['nearest_hoursArr', 'nearest_hoursDep'], axis=1)"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "txHM39lKxOnw",
- "colab_type": "code",
- "outputId": "f2216878-066d-4510-8a12-f2f0ccb133e9",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 305
- }
- },
- "source": [
- "pf1.head()"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Cloudcover | \n",
- " DewPointF | \n",
- " Humidity | \n",
- " Pressure | \n",
- " Visibilty | \n",
- " WeatherCode | \n",
- " WindChillF | \n",
- " WindDirDegree | \n",
- " WindGustKmph | \n",
- " WindSpeedKmph | \n",
- " airport | \n",
- " date | \n",
- " precipMM | \n",
- " tempF | \n",
- " time | \n",
- " Year | \n",
- " Quarter | \n",
- " Month | \n",
- " DayofMonth | \n",
- " FlightDate | \n",
- " OriginAirportID | \n",
- " Origin | \n",
- " DestAirportID | \n",
- " Dest | \n",
- " CRSDepTime | \n",
- " DepTime | \n",
- " DepDelayMinutes | \n",
- " DepDel15 | \n",
- " CRSArrTime | \n",
- " ArrTime | \n",
- " ArrDelayMinutes | \n",
- " ArrDel15 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 57 | \n",
- " 70 | \n",
- " 89 | \n",
- " 1021 | \n",
- " 9 | \n",
- " 176 | \n",
- " 74 | \n",
- " 147 | \n",
- " 10 | \n",
- " 5 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 2.5 | \n",
- " 74 | \n",
- " 0 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 13303 | \n",
- " MIA | \n",
- " 1630 | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 1739 | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 87 | \n",
- " 70 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 6 | \n",
- " 302 | \n",
- " 71 | \n",
- " 226 | \n",
- " 7 | \n",
- " 4 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 3.4 | \n",
- " 71 | \n",
- " 500 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 13303 | \n",
- " MIA | \n",
- " 500 | \n",
- " 503 | \n",
- " 3.0 | \n",
- " 0.0 | \n",
- " 603 | \n",
- " 559 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 87 | \n",
- " 70 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 6 | \n",
- " 302 | \n",
- " 71 | \n",
- " 226 | \n",
- " 7 | \n",
- " 4 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 3.4 | \n",
- " 71 | \n",
- " 500 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 11618 | \n",
- " EWR | \n",
- " 530 | \n",
- " 527 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 759 | \n",
- " 741 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 84 | \n",
- " 71 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 5 | \n",
- " 302 | \n",
- " 71 | \n",
- " 240 | \n",
- " 6 | \n",
- " 3 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 2.1 | \n",
- " 72 | \n",
- " 600 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 11298 | \n",
- " DFW | \n",
- " 600 | \n",
- " 612 | \n",
- " 12.0 | \n",
- " 0.0 | \n",
- " 801 | \n",
- " 800 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 84 | \n",
- " 71 | \n",
- " 97 | \n",
- " 1019 | \n",
- " 5 | \n",
- " 302 | \n",
- " 71 | \n",
- " 240 | \n",
- " 6 | \n",
- " 3 | \n",
- " MCO | \n",
- " 2016-01-01 | \n",
- " 2.1 | \n",
- " 72 | \n",
- " 600 | \n",
- " 2016 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2016-01-01 | \n",
- " 13204 | \n",
- " MCO | \n",
- " 12478 | \n",
- " JFK | \n",
- " 604 | \n",
- " 625 | \n",
- " 21.0 | \n",
- " 1.0 | \n",
- " 830 | \n",
- " 850 | \n",
- " 20.0 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Cloudcover DewPointF Humidity ... ArrTime ArrDelayMinutes ArrDel15\n",
- "0 57 70 89 ... 0 0.0 0.0\n",
- "1 87 70 97 ... 559 0.0 0.0\n",
- "2 87 70 97 ... 741 0.0 0.0\n",
- "3 84 71 97 ... 800 0.0 0.0\n",
- "4 84 71 97 ... 850 20.0 1.0\n",
- "\n",
- "[5 rows x 32 columns]"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 8
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "stzNOS1TwzG5",
- "colab_type": "code",
- "outputId": "53a62a3e-c98d-4c40-bf4b-d1459af9cd90",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 35
- }
- },
- "source": [
- "pf1.shape"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "(1438104, 32)"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 9
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "Vhff2XcxymX3",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "need=pf1.drop([\"airport\",\"date\",\"DepTime\",\"ArrTime\",\"DepDelayMinutes\",\"ArrDelayMinutes\",\"ArrDel15\",\"DepDel15\",\"FlightDate\"],axis=1)"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "UO_eAKHD4y0l",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "k = LabelEncoder()\n",
- "c = k.fit_transform(need[\"Dest\"])\n",
- "need[\"Dest\"] = c\n",
- "c = k.fit_transform(need[\"Origin\"])\n",
- "need[\"Origin\"] = c\n",
- "c = k.fit_transform(need[\"WeatherCode\"])\n",
- "need[\"WeatherCode\"] = c\n",
- "f=np.asarray(need)"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "mFpWvBj6x6lC",
- "colab_type": "code",
- "outputId": "90df5805-cd26-4e28-f606-28b921a681d9",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 35
- }
- },
- "source": [
- "pf1['DepDel15']=pf1.DepDel15.astype(int)\n",
- "l=np.asarray(pf1[\"DepDel15\"])\n",
- "l"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "array([0, 0, 0, ..., 1, 0, 0])"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 8
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "MFFaA4VE6cps",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "need.to_csv(\"/content/gdrive/My Drive/4.csv\")"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "7Fd3ODR385nW",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "f_train, f_test, l_train, l_test = train_test_split(f, l, test_size=0.20, random_state=42)"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "REiK6VhAJ3U_",
- "colab_type": "text"
- },
- "source": [
- "**EXTRA TREES**"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "bNgm8RKUKsJc",
- "colab_type": "code",
- "outputId": "31a92513-61b5-40bd-ee41-af339cd474af",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 74
- }
- },
- "source": [
- "#before sampling\n",
- "et = ExtraTreesClassifier()\n",
- "et.fit(f_train,l_train)\n"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
- " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
- ],
- "name": "stderr"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "dCZJfFgSMhAa",
- "colab_type": "code",
- "outputId": "d44111d6-ba10-464c-968c-267835fac336",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 244
- }
- },
- "source": [
- "pred = et.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "error",
- "ename": "NameError",
- "evalue": "ignored",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0met\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprecision\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprecision_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mrecall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecall_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mf1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf1_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"Precision:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecision\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mNameError\u001b[0m: name 'et' is not defined"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "51QiB7EMM51I",
- "colab_type": "code",
- "outputId": "204d38a3-8704-4db8-96ee-6a3c3d48b5ee",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 181
- }
- },
- "source": [
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- " precision recall f1-score support\n",
- "\n",
- " 0 0.87 0.95 0.91 228878\n",
- " 1 0.71 0.45 0.55 58743\n",
- "\n",
- " accuracy 0.85 287621\n",
- " macro avg 0.79 0.70 0.73 287621\n",
- "weighted avg 0.84 0.85 0.84 287621\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "FWhcs0K2NYUh",
- "colab_type": "code",
- "outputId": "848d272e-9fe3-4823-c1f7-05981e399ba1",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 292
- }
- },
- "source": [
- "#oversampled output\n",
- "sos = SMOTE(random_state=42)\n",
- "x, y = sos.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "rg =ExtraTreesClassifier()\n",
- "rg.fit(f_train,l_train)\n",
- "pred = rg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
- " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.9068624599831894\n",
- "Recall: 0.904870251810186\n",
- "F1 Score: 0.9047613202127298\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.88 0.94 0.91 228508\n",
- " 1 0.94 0.87 0.90 229456\n",
- "\n",
- " accuracy 0.90 457964\n",
- " macro avg 0.91 0.90 0.90 457964\n",
- "weighted avg 0.91 0.90 0.90 457964\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "_l-8802v1XcA",
- "colab_type": "code",
- "outputId": "6f1ba080-41ec-4841-88f8-6218ac580653",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 292
- }
- },
- "source": [
- "\n",
- "# Random Undersampling\n",
- "ru = RandomUnderSampler(random_state=42)\n",
- "x, y = ru.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "r =ExtraTreesClassifier()\n",
- "r.fit(f_train,l_train)\n",
- "pred = r.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
- " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.7472095814587808\n",
- "Recall: 0.7444448234110405\n",
- "F1 Score: 0.7437519324824997\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.72 0.80 0.76 58571\n",
- " 1 0.77 0.69 0.73 58707\n",
- "\n",
- " accuracy 0.74 117278\n",
- " macro avg 0.75 0.74 0.74 117278\n",
- "weighted avg 0.75 0.74 0.74 117278\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "n4zyuBjQJgLA",
- "colab_type": "text"
- },
- "source": [
- "**RANDOM FOREST**"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "dOFgV7AT9b7B",
- "colab_type": "code",
- "outputId": "b4f072de-5cd4-4c42-8810-febbaf141067",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 201
- }
- },
- "source": [
- "#before sampling\n",
- "ran = RandomForestClassifier()\n",
- "ran.fit(f_train,l_train)\n"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
- " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
- " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
- " min_impurity_decrease=0.0, min_impurity_split=None,\n",
- " min_samples_leaf=1, min_samples_split=2,\n",
- " min_weight_fraction_leaf=0.0, n_estimators=10,\n",
- " n_jobs=None, oob_score=False, random_state=None,\n",
- " verbose=0, warm_start=False)"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 18
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "0b4C4gYb90kF",
- "colab_type": "code",
- "outputId": "f266aa34-dac5-4142-e4d6-c910f13ed814",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 72
- }
- },
- "source": [
- "pred = ran.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.7908160255096451\n",
- "Recall: 0.7880079810365115\n",
- "F1 Score: 0.7875114091747006\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "6lfk_-QPNBSp",
- "colab_type": "code",
- "outputId": "258d7f63-0288-47f1-8381-14cd5542d576",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 292
- }
- },
- "source": [
- "#oversampled output\n",
- "sos = SMOTE(random_state=42)\n",
- "x, y = sos.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "rg = RandomForestClassifier()\n",
- "rg.fit(f_train,l_train)\n",
- "pred = rg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
- " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.9146826332086521\n",
- "Recall: 0.9114886759657964\n",
- "F1 Score: 0.9113267780959348\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.88 0.96 0.92 228508\n",
- " 1 0.95 0.87 0.91 229456\n",
- "\n",
- " accuracy 0.91 457964\n",
- " macro avg 0.91 0.91 0.91 457964\n",
- "weighted avg 0.91 0.91 0.91 457964\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "IQyqICuh4Rug",
- "colab_type": "text"
- },
- "source": [
- "**LOGISTIC REGRESSION**"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "JAXLCGcg4FPm",
- "colab_type": "code",
- "outputId": "3e574e8a-6b7e-4389-f615-884e7eeae5af",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 165
- }
- },
- "source": [
- "#before sampling\n",
- "lg = LogisticRegression()\n",
- "lg.fit(f_train,l_train)\n"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
- " FutureWarning)\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
- " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
- " multi_class='warn', n_jobs=None, penalty='l2',\n",
- " random_state=None, solver='warn', tol=0.0001, verbose=0,\n",
- " warm_start=False)"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 21
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "1SfMUDnJ4ytt",
- "colab_type": "code",
- "outputId": "bfd786cf-17ce-4cd9-f90c-34de53419368",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 72
- }
- },
- "source": [
- "pred = lg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.6153303766290811\n",
- "Recall: 0.6150046728563817\n",
- "F1 Score: 0.6146723066459795\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "MFpprCUv5IFC",
- "colab_type": "code",
- "outputId": "0b67b00e-edb5-409e-d633-2d42ba5ca8d3",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 292
- }
- },
- "source": [
- "# undersampled output\n",
- "rus = RandomUnderSampler(random_state=42)\n",
- "x, y = rus.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "lg = LogisticRegression()\n",
- "lg.fit(f_train,l_train)\n",
- "pred = rg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
- " FutureWarning)\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.934776416365854\n",
- "Recall: 0.9280427701700233\n",
- "F1 Score: 0.9277686765240396\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.88 0.99 0.93 58571\n",
- " 1 0.99 0.87 0.92 58707\n",
- "\n",
- " accuracy 0.93 117278\n",
- " macro avg 0.93 0.93 0.93 117278\n",
- "weighted avg 0.93 0.93 0.93 117278\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "YQMvmoss5mgK",
- "colab_type": "code",
- "outputId": "5f39c5ce-483c-4b18-e448-94e9ac6e5abf",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 292
- }
- },
- "source": [
- "#oversampled output\n",
- "sos = SMOTE(random_state=42)\n",
- "x, y = sos.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "lg = LogisticRegression()\n",
- "lg.fit(f_train,l_train)\n",
- "pred = rg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
- " FutureWarning)\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.9146826332086521\n",
- "Recall: 0.9114886759657964\n",
- "F1 Score: 0.9113267780959348\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.88 0.96 0.92 228508\n",
- " 1 0.95 0.87 0.91 229456\n",
- "\n",
- " accuracy 0.91 457964\n",
- " macro avg 0.91 0.91 0.91 457964\n",
- "weighted avg 0.91 0.91 0.91 457964\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "HJdqEtOI5_Dc",
- "colab_type": "text"
- },
- "source": [
- "**DECISION TREE**"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "pBb344bD6X7C",
- "colab_type": "code",
- "outputId": "2fb2420a-a225-410f-a1ff-187e0f12fc9a",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 126
- }
- },
- "source": [
- "#before sampling\n",
- "dg = tree.DecisionTreeClassifier()\n",
- "dg.fit(f_train,l_train)\n"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
- " max_features=None, max_leaf_nodes=None,\n",
- " min_impurity_decrease=0.0, min_impurity_split=None,\n",
- " min_samples_leaf=1, min_samples_split=2,\n",
- " min_weight_fraction_leaf=0.0, presort=False,\n",
- " random_state=None, splitter='best')"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 25
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "6v0jyL876pXi",
- "colab_type": "code",
- "outputId": "0c1a6773-ca62-4079-ebb1-24a35146f07b",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 72
- }
- },
- "source": [
- "pred = dg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.9099632535050411\n",
- "Recall: 0.9099405193421317\n",
- "F1 Score: 0.9099384765292666\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "yWOMxf4l65o0",
- "colab_type": "code",
- "outputId": "8bfabf59-a1f8-4cc5-9301-6541ad09548a",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 235
- }
- },
- "source": [
- "# undersampled output\n",
- "rus = RandomUnderSampler(random_state=42)\n",
- "x, y = rus.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "dg=tree.DecisionTreeClassifier()\n",
- "dg.fit(f_train,l_train)\n",
- "pred = dg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.7962277290798526\n",
- "Recall: 0.7962277665035216\n",
- "F1 Score: 0.796227680870621\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.80 0.80 0.80 58571\n",
- " 1 0.80 0.80 0.80 58707\n",
- "\n",
- " accuracy 0.80 117278\n",
- " macro avg 0.80 0.80 0.80 117278\n",
- "weighted avg 0.80 0.80 0.80 117278\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "Z5mZjeS77RbE",
- "colab_type": "code",
- "outputId": "1fa6778c-c412-4c0e-ccbf-b85f4d2b9f1d",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 235
- }
- },
- "source": [
- "#oversampled output\n",
- "sos = SMOTE(random_state=42)\n",
- "x, y = sos.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "dg=tree.DecisionTreeClassifier()\n",
- "dg.fit(f_train,l_train)\n",
- "pred = dg.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Precision: 0.9095492394117647\n",
- "Recall: 0.909527823147671\n",
- "F1 Score: 0.9095258643003094\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.91 0.91 0.91 228508\n",
- " 1 0.91 0.91 0.91 229456\n",
- "\n",
- " accuracy 0.91 457964\n",
- " macro avg 0.91 0.91 0.91 457964\n",
- "weighted avg 0.91 0.91 0.91 457964\n",
- "\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "BXF3EMUD9ix_",
- "colab_type": "text"
- },
- "source": [
- "**GRADIENT BOOSTING**"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "zVCw5IDa_IU3",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "#before sampling\n",
- "gb = GradientBoostingClassifier()\n",
- "gb.fit(f_train,l_train)\n"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "AjsubeaO_khM",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "pred = gb.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "nXRgEHI7_nIi",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "# undersampled output\n",
- "rus = RandomUnderSampler(random_state=42)\n",
- "x, y = rus.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "gb = GradientBoostingClassifier()\n",
- "gb.fit(f_train,l_train)\n",
- "pred = gb.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "3XVZ5-7D_1uC",
- "colab_type": "code",
- "colab": {}
- },
- "source": [
- "#oversampled output\n",
- "sos = SMOTE(random_state=42)\n",
- "x, y = sos.fit_resample(f, l)\n",
- "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
- "gb = GradientBoostingClassifier()\n",
- "gb.fit(f_train,l_train)\n",
- "pred = gb.predict(f_test)\n",
- "precision = precision_score(l_test, pred, average=\"weighted\")\n",
- "recall = recall_score(l_test, pred, average=\"weighted\")\n",
- "f1 = f1_score(l_test, pred, average=\"weighted\")\n",
- "print (\"Precision:\", precision)\n",
- "print (\"Recall:\", recall)\n",
- "print (\"F1 Score:\", f1)\n",
- "print(classification_report(l_test, pred))"
- ],
- "execution_count": 0,
- "outputs": []
- }
- ]
-}
\ No newline at end of file