From fc13ed68c23976d7f92dd0c64dc080b7371852e6 Mon Sep 17 00:00:00 2001 From: pmannil <45764461+pmannil@users.noreply.github.com> Date: Tue, 23 Jul 2019 22:34:30 +0530 Subject: [PATCH 1/2] Created using Colaboratory --- module2.ipynb | 1637 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1637 insertions(+) create mode 100644 module2.ipynb diff --git a/module2.ipynb b/module2.ipynb new file mode 100644 index 00000000..573e3159 --- /dev/null +++ b/module2.ipynb @@ -0,0 +1,1637 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "module2.ipynb", + "version": "0.3.2", + "provenance": [], + "collapsed_sections": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qmQOF7LIqSeI", + "colab_type": "code", + "outputId": "54ebb43f-1d54-4855-853f-178c3cb432dc", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/gdrive')" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yvVHvIcFqW6i", + "colab_type": "code", + "outputId": "38033657-1d6a-4999-c6eb-f48cab2dd14f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + } + }, + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import ExtraTreesClassifier\n", + "from sklearn import tree\n", + "from sklearn import svm\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report\n", + "from imblearn.over_sampling import SMOTE\n", + "from imblearn.under_sampling import RandomUnderSampler\n", + "from google.colab import files" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/externals/six.py:31: DeprecationWarning: The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).\n", + " \"(https://pypi.org/project/six/).\", DeprecationWarning)\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tPr-qj5cqj_T", + "colab_type": "code", + "colab": {} + }, + "source": [ + "df=pd.read_csv('/content/gdrive/My Drive/1.csv')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "dTvIVJcWw6Nf", + "colab_type": "code", + "outputId": "24eb7918-16a0-4116-9076-aefa570b6bce", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 305 + } + }, + "source": [ + "df.head()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CloudcoverDewPointFHumidityPressureVisibiltyWeatherCodeWindChillFWindDirDegreeWindGustKmphWindSpeedKmphairportdateprecipMMtempFtimeYearQuarterMonthDayofMonthFlightDateOriginAirportIDOriginDestAirportIDDestCRSDepTimeDepTimeDepDelayMinutesDepDel15CRSArrTimeArrTimeArrDelayMinutesArrDel15nearest_hoursArrnearest_hoursDep
05770891021917674147105MCO2016-01-012.574020161112016-01-0113204MCO13303MIA163000.00.0173900.00.000
1877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO13303MIA5005033.00.06035590.00.0600500
2877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO11618EWR5305270.00.07597410.00.0800500
3847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO11298DFW60061212.00.08018000.00.0800600
4847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO12478JFK60462521.01.083085020.01.0900600
\n", + "
" + ], + "text/plain": [ + " Cloudcover DewPointF ... nearest_hoursArr nearest_hoursDep\n", + "0 57 70 ... 0 0\n", + "1 87 70 ... 600 500\n", + "2 87 70 ... 800 500\n", + "3 84 71 ... 800 600\n", + "4 84 71 ... 900 600\n", + "\n", + "[5 rows x 34 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "y-IaZ6c_xOk4", + "colab_type": "code", + "colab": {} + }, + "source": [ + "pf1=df.drop(['nearest_hoursArr', 'nearest_hoursDep'], axis=1)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "txHM39lKxOnw", + "colab_type": "code", + "outputId": "f2216878-066d-4510-8a12-f2f0ccb133e9", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 305 + } + }, + "source": [ + "pf1.head()" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CloudcoverDewPointFHumidityPressureVisibiltyWeatherCodeWindChillFWindDirDegreeWindGustKmphWindSpeedKmphairportdateprecipMMtempFtimeYearQuarterMonthDayofMonthFlightDateOriginAirportIDOriginDestAirportIDDestCRSDepTimeDepTimeDepDelayMinutesDepDel15CRSArrTimeArrTimeArrDelayMinutesArrDel15
05770891021917674147105MCO2016-01-012.574020161112016-01-0113204MCO13303MIA163000.00.0173900.00.0
1877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO13303MIA5005033.00.06035590.00.0
2877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO11618EWR5305270.00.07597410.00.0
3847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO11298DFW60061212.00.08018000.00.0
4847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO12478JFK60462521.01.083085020.01.0
\n", + "
" + ], + "text/plain": [ + " Cloudcover DewPointF Humidity ... ArrTime ArrDelayMinutes ArrDel15\n", + "0 57 70 89 ... 0 0.0 0.0\n", + "1 87 70 97 ... 559 0.0 0.0\n", + "2 87 70 97 ... 741 0.0 0.0\n", + "3 84 71 97 ... 800 0.0 0.0\n", + "4 84 71 97 ... 850 20.0 1.0\n", + "\n", + "[5 rows x 32 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "stzNOS1TwzG5", + "colab_type": "code", + "outputId": "53a62a3e-c98d-4c40-bf4b-d1459af9cd90", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "source": [ + "pf1.shape" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(1438104, 32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Vhff2XcxymX3", + "colab_type": "code", + "colab": {} + }, + "source": [ + "need=pf1.drop([\"airport\",\"date\",\"DepTime\",\"ArrTime\",\"DepDelayMinutes\",\"ArrDelayMinutes\",\"ArrDel15\",\"DepDel15\",\"FlightDate\"],axis=1)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "UO_eAKHD4y0l", + "colab_type": "code", + "colab": {} + }, + "source": [ + "k = LabelEncoder()\n", + "c = k.fit_transform(need[\"Dest\"])\n", + "need[\"Dest\"] = c\n", + "c = k.fit_transform(need[\"Origin\"])\n", + "need[\"Origin\"] = c\n", + "c = k.fit_transform(need[\"WeatherCode\"])\n", + "need[\"WeatherCode\"] = c\n", + "f=np.asarray(need)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "mFpWvBj6x6lC", + "colab_type": "code", + "outputId": "90df5805-cd26-4e28-f606-28b921a681d9", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "source": [ + "pf1['DepDel15']=pf1.DepDel15.astype(int)\n", + "l=np.asarray(pf1[\"DepDel15\"])\n", + "l" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 1, 0, 0])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MFFaA4VE6cps", + "colab_type": "code", + "colab": {} + }, + "source": [ + "need.to_csv(\"/content/gdrive/My Drive/4.csv\")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "7Fd3ODR385nW", + "colab_type": "code", + "colab": {} + }, + "source": [ + "f_train, f_test, l_train, l_test = train_test_split(f, l, test_size=0.20, random_state=42)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "REiK6VhAJ3U_", + "colab_type": "text" + }, + "source": [ + "**EXTRA TREES**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "bNgm8RKUKsJc", + "colab_type": "code", + "outputId": "31a92513-61b5-40bd-ee41-af339cd474af", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + } + }, + "source": [ + "#before sampling\n", + "et = ExtraTreesClassifier()\n", + "et.fit(f_train,l_train)\n" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", + " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dCZJfFgSMhAa", + "colab_type": "code", + "outputId": "d44111d6-ba10-464c-968c-267835fac336", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 244 + } + }, + "source": [ + "pred = et.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0met\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprecision\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprecision_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mrecall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecall_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mf1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf1_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"Precision:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecision\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'et' is not defined" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "51QiB7EMM51I", + "colab_type": "code", + "outputId": "204d38a3-8704-4db8-96ee-6a3c3d48b5ee", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 181 + } + }, + "source": [ + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.87 0.95 0.91 228878\n", + " 1 0.71 0.45 0.55 58743\n", + "\n", + " accuracy 0.85 287621\n", + " macro avg 0.79 0.70 0.73 287621\n", + "weighted avg 0.84 0.85 0.84 287621\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FWhcs0K2NYUh", + "colab_type": "code", + "outputId": "848d272e-9fe3-4823-c1f7-05981e399ba1", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "source": [ + "#oversampled output\n", + "sos = SMOTE(random_state=42)\n", + "x, y = sos.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "rg =ExtraTreesClassifier()\n", + "rg.fit(f_train,l_train)\n", + "pred = rg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", + " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Precision: 0.9068624599831894\n", + "Recall: 0.904870251810186\n", + "F1 Score: 0.9047613202127298\n", + " precision recall f1-score support\n", + "\n", + " 0 0.88 0.94 0.91 228508\n", + " 1 0.94 0.87 0.90 229456\n", + "\n", + " accuracy 0.90 457964\n", + " macro avg 0.91 0.90 0.90 457964\n", + "weighted avg 0.91 0.90 0.90 457964\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_l-8802v1XcA", + "colab_type": "code", + "outputId": "6f1ba080-41ec-4841-88f8-6218ac580653", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "source": [ + "\n", + "# Random Undersampling\n", + "ru = RandomUnderSampler(random_state=42)\n", + "x, y = ru.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "r =ExtraTreesClassifier()\n", + "r.fit(f_train,l_train)\n", + "pred = r.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", + " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Precision: 0.7472095814587808\n", + "Recall: 0.7444448234110405\n", + "F1 Score: 0.7437519324824997\n", + " precision recall f1-score support\n", + "\n", + " 0 0.72 0.80 0.76 58571\n", + " 1 0.77 0.69 0.73 58707\n", + "\n", + " accuracy 0.74 117278\n", + " macro avg 0.75 0.74 0.74 117278\n", + "weighted avg 0.75 0.74 0.74 117278\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n4zyuBjQJgLA", + "colab_type": "text" + }, + "source": [ + "**RANDOM FOREST**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dOFgV7AT9b7B", + "colab_type": "code", + "outputId": "b4f072de-5cd4-4c42-8810-febbaf141067", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 201 + } + }, + "source": [ + "#before sampling\n", + "ran = RandomForestClassifier()\n", + "ran.fit(f_train,l_train)\n" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", + " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" + ], + "name": "stderr" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=10,\n", + " n_jobs=None, oob_score=False, random_state=None,\n", + " verbose=0, warm_start=False)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0b4C4gYb90kF", + "colab_type": "code", + "outputId": "f266aa34-dac5-4142-e4d6-c910f13ed814", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 + } + }, + "source": [ + "pred = ran.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Precision: 0.7908160255096451\n", + "Recall: 0.7880079810365115\n", + "F1 Score: 0.7875114091747006\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6lfk_-QPNBSp", + "colab_type": "code", + "outputId": "258d7f63-0288-47f1-8381-14cd5542d576", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "source": [ + "#oversampled output\n", + "sos = SMOTE(random_state=42)\n", + "x, y = sos.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "rg = RandomForestClassifier()\n", + "rg.fit(f_train,l_train)\n", + "pred = rg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", + " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Precision: 0.9146826332086521\n", + "Recall: 0.9114886759657964\n", + "F1 Score: 0.9113267780959348\n", + " precision recall f1-score support\n", + "\n", + " 0 0.88 0.96 0.92 228508\n", + " 1 0.95 0.87 0.91 229456\n", + "\n", + " accuracy 0.91 457964\n", + " macro avg 0.91 0.91 0.91 457964\n", + "weighted avg 0.91 0.91 0.91 457964\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IQyqICuh4Rug", + "colab_type": "text" + }, + "source": [ + "**LOGISTIC REGRESSION**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JAXLCGcg4FPm", + "colab_type": "code", + "outputId": "3e574e8a-6b7e-4389-f615-884e7eeae5af", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 165 + } + }, + "source": [ + "#before sampling\n", + "lg = LogisticRegression()\n", + "lg.fit(f_train,l_train)\n" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ], + "name": "stderr" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", + " multi_class='warn', n_jobs=None, penalty='l2',\n", + " random_state=None, solver='warn', tol=0.0001, verbose=0,\n", + " warm_start=False)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1SfMUDnJ4ytt", + "colab_type": "code", + "outputId": "bfd786cf-17ce-4cd9-f90c-34de53419368", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 + } + }, + "source": [ + "pred = lg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Precision: 0.6153303766290811\n", + "Recall: 0.6150046728563817\n", + "F1 Score: 0.6146723066459795\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MFpprCUv5IFC", + "colab_type": "code", + "outputId": "0b67b00e-edb5-409e-d633-2d42ba5ca8d3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "source": [ + "# undersampled output\n", + "rus = RandomUnderSampler(random_state=42)\n", + "x, y = rus.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "lg = LogisticRegression()\n", + "lg.fit(f_train,l_train)\n", + "pred = rg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Precision: 0.934776416365854\n", + "Recall: 0.9280427701700233\n", + "F1 Score: 0.9277686765240396\n", + " precision recall f1-score support\n", + "\n", + " 0 0.88 0.99 0.93 58571\n", + " 1 0.99 0.87 0.92 58707\n", + "\n", + " accuracy 0.93 117278\n", + " macro avg 0.93 0.93 0.93 117278\n", + "weighted avg 0.93 0.93 0.93 117278\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YQMvmoss5mgK", + "colab_type": "code", + "outputId": "5f39c5ce-483c-4b18-e448-94e9ac6e5abf", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "source": [ + "#oversampled output\n", + "sos = SMOTE(random_state=42)\n", + "x, y = sos.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "lg = LogisticRegression()\n", + "lg.fit(f_train,l_train)\n", + "pred = rg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Precision: 0.9146826332086521\n", + "Recall: 0.9114886759657964\n", + "F1 Score: 0.9113267780959348\n", + " precision recall f1-score support\n", + "\n", + " 0 0.88 0.96 0.92 228508\n", + " 1 0.95 0.87 0.91 229456\n", + "\n", + " accuracy 0.91 457964\n", + " macro avg 0.91 0.91 0.91 457964\n", + "weighted avg 0.91 0.91 0.91 457964\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HJdqEtOI5_Dc", + "colab_type": "text" + }, + "source": [ + "**DECISION TREE**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pBb344bD6X7C", + "colab_type": "code", + "outputId": "2fb2420a-a225-410f-a1ff-187e0f12fc9a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 126 + } + }, + "source": [ + "#before sampling\n", + "dg = tree.DecisionTreeClassifier()\n", + "dg.fit(f_train,l_train)\n" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", + " max_features=None, max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, presort=False,\n", + " random_state=None, splitter='best')" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6v0jyL876pXi", + "colab_type": "code", + "outputId": "0c1a6773-ca62-4079-ebb1-24a35146f07b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 + } + }, + "source": [ + "pred = dg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Precision: 0.9099632535050411\n", + "Recall: 0.9099405193421317\n", + "F1 Score: 0.9099384765292666\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yWOMxf4l65o0", + "colab_type": "code", + "outputId": "8bfabf59-a1f8-4cc5-9301-6541ad09548a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + } + }, + "source": [ + "# undersampled output\n", + "rus = RandomUnderSampler(random_state=42)\n", + "x, y = rus.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "dg=tree.DecisionTreeClassifier()\n", + "dg.fit(f_train,l_train)\n", + "pred = dg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Precision: 0.7962277290798526\n", + "Recall: 0.7962277665035216\n", + "F1 Score: 0.796227680870621\n", + " precision recall f1-score support\n", + "\n", + " 0 0.80 0.80 0.80 58571\n", + " 1 0.80 0.80 0.80 58707\n", + "\n", + " accuracy 0.80 117278\n", + " macro avg 0.80 0.80 0.80 117278\n", + "weighted avg 0.80 0.80 0.80 117278\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Z5mZjeS77RbE", + "colab_type": "code", + "outputId": "1fa6778c-c412-4c0e-ccbf-b85f4d2b9f1d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + } + }, + "source": [ + "#oversampled output\n", + "sos = SMOTE(random_state=42)\n", + "x, y = sos.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "dg=tree.DecisionTreeClassifier()\n", + "dg.fit(f_train,l_train)\n", + "pred = dg.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Precision: 0.9095492394117647\n", + "Recall: 0.909527823147671\n", + "F1 Score: 0.9095258643003094\n", + " precision recall f1-score support\n", + "\n", + " 0 0.91 0.91 0.91 228508\n", + " 1 0.91 0.91 0.91 229456\n", + "\n", + " accuracy 0.91 457964\n", + " macro avg 0.91 0.91 0.91 457964\n", + "weighted avg 0.91 0.91 0.91 457964\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BXF3EMUD9ix_", + "colab_type": "text" + }, + "source": [ + "**GRADIENT BOOSTING**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zVCw5IDa_IU3", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#before sampling\n", + "gb = GradientBoostingClassifier()\n", + "gb.fit(f_train,l_train)\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "AjsubeaO_khM", + "colab_type": "code", + "colab": {} + }, + "source": [ + "pred = gb.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "nXRgEHI7_nIi", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# undersampled output\n", + "rus = RandomUnderSampler(random_state=42)\n", + "x, y = rus.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "gb = GradientBoostingClassifier()\n", + "gb.fit(f_train,l_train)\n", + "pred = gb.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3XVZ5-7D_1uC", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#oversampled output\n", + "sos = SMOTE(random_state=42)\n", + "x, y = sos.fit_resample(f, l)\n", + "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "gb = GradientBoostingClassifier()\n", + "gb.fit(f_train,l_train)\n", + "pred = gb.predict(f_test)\n", + "precision = precision_score(l_test, pred, average=\"weighted\")\n", + "recall = recall_score(l_test, pred, average=\"weighted\")\n", + "f1 = f1_score(l_test, pred, average=\"weighted\")\n", + "print (\"Precision:\", precision)\n", + "print (\"Recall:\", recall)\n", + "print (\"F1 Score:\", f1)\n", + "print(classification_report(l_test, pred))" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From 1d12eaee247cbd2b66cced0ea4103d2cb337645c Mon Sep 17 00:00:00 2001 From: pmannil <45764461+pmannil@users.noreply.github.com> Date: Tue, 23 Jul 2019 22:35:05 +0530 Subject: [PATCH 2/2] Delete module2.ipynb --- module2.ipynb | 1637 ------------------------------------------------- 1 file changed, 1637 deletions(-) delete mode 100644 module2.ipynb diff --git a/module2.ipynb b/module2.ipynb deleted file mode 100644 index 573e3159..00000000 --- a/module2.ipynb +++ /dev/null @@ -1,1637 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "module2.ipynb", - "version": "0.3.2", - "provenance": [], - "collapsed_sections": [], - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "qmQOF7LIqSeI", - "colab_type": "code", - "outputId": "54ebb43f-1d54-4855-853f-178c3cb432dc", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/gdrive')" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yvVHvIcFqW6i", - "colab_type": "code", - "outputId": "38033657-1d6a-4999-c6eb-f48cab2dd14f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - } - }, - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import os\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.ensemble import ExtraTreesClassifier\n", - "from sklearn import tree\n", - "from sklearn import svm\n", - "from sklearn.ensemble import GradientBoostingClassifier\n", - "from sklearn.metrics import accuracy_score\n", - "from sklearn.preprocessing import LabelEncoder\n", - "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report\n", - "from imblearn.over_sampling import SMOTE\n", - "from imblearn.under_sampling import RandomUnderSampler\n", - "from google.colab import files" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/externals/six.py:31: DeprecationWarning: The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).\n", - " \"(https://pypi.org/project/six/).\", DeprecationWarning)\n" - ], - "name": "stderr" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "tPr-qj5cqj_T", - "colab_type": "code", - "colab": {} - }, - "source": [ - "df=pd.read_csv('/content/gdrive/My Drive/1.csv')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "dTvIVJcWw6Nf", - "colab_type": "code", - "outputId": "24eb7918-16a0-4116-9076-aefa570b6bce", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 305 - } - }, - "source": [ - "df.head()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CloudcoverDewPointFHumidityPressureVisibiltyWeatherCodeWindChillFWindDirDegreeWindGustKmphWindSpeedKmphairportdateprecipMMtempFtimeYearQuarterMonthDayofMonthFlightDateOriginAirportIDOriginDestAirportIDDestCRSDepTimeDepTimeDepDelayMinutesDepDel15CRSArrTimeArrTimeArrDelayMinutesArrDel15nearest_hoursArrnearest_hoursDep
05770891021917674147105MCO2016-01-012.574020161112016-01-0113204MCO13303MIA163000.00.0173900.00.000
1877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO13303MIA5005033.00.06035590.00.0600500
2877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO11618EWR5305270.00.07597410.00.0800500
3847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO11298DFW60061212.00.08018000.00.0800600
4847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO12478JFK60462521.01.083085020.01.0900600
\n", - "
" - ], - "text/plain": [ - " Cloudcover DewPointF ... nearest_hoursArr nearest_hoursDep\n", - "0 57 70 ... 0 0\n", - "1 87 70 ... 600 500\n", - "2 87 70 ... 800 500\n", - "3 84 71 ... 800 600\n", - "4 84 71 ... 900 600\n", - "\n", - "[5 rows x 34 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 6 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "y-IaZ6c_xOk4", - "colab_type": "code", - "colab": {} - }, - "source": [ - "pf1=df.drop(['nearest_hoursArr', 'nearest_hoursDep'], axis=1)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "txHM39lKxOnw", - "colab_type": "code", - "outputId": "f2216878-066d-4510-8a12-f2f0ccb133e9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 305 - } - }, - "source": [ - "pf1.head()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CloudcoverDewPointFHumidityPressureVisibiltyWeatherCodeWindChillFWindDirDegreeWindGustKmphWindSpeedKmphairportdateprecipMMtempFtimeYearQuarterMonthDayofMonthFlightDateOriginAirportIDOriginDestAirportIDDestCRSDepTimeDepTimeDepDelayMinutesDepDel15CRSArrTimeArrTimeArrDelayMinutesArrDel15
05770891021917674147105MCO2016-01-012.574020161112016-01-0113204MCO13303MIA163000.00.0173900.00.0
1877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO13303MIA5005033.00.06035590.00.0
2877097101963027122674MCO2016-01-013.47150020161112016-01-0113204MCO11618EWR5305270.00.07597410.00.0
3847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO11298DFW60061212.00.08018000.00.0
4847197101953027124063MCO2016-01-012.17260020161112016-01-0113204MCO12478JFK60462521.01.083085020.01.0
\n", - "
" - ], - "text/plain": [ - " Cloudcover DewPointF Humidity ... ArrTime ArrDelayMinutes ArrDel15\n", - "0 57 70 89 ... 0 0.0 0.0\n", - "1 87 70 97 ... 559 0.0 0.0\n", - "2 87 70 97 ... 741 0.0 0.0\n", - "3 84 71 97 ... 800 0.0 0.0\n", - "4 84 71 97 ... 850 20.0 1.0\n", - "\n", - "[5 rows x 32 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "stzNOS1TwzG5", - "colab_type": "code", - "outputId": "53a62a3e-c98d-4c40-bf4b-d1459af9cd90", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "pf1.shape" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(1438104, 32)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Vhff2XcxymX3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "need=pf1.drop([\"airport\",\"date\",\"DepTime\",\"ArrTime\",\"DepDelayMinutes\",\"ArrDelayMinutes\",\"ArrDel15\",\"DepDel15\",\"FlightDate\"],axis=1)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "UO_eAKHD4y0l", - "colab_type": "code", - "colab": {} - }, - "source": [ - "k = LabelEncoder()\n", - "c = k.fit_transform(need[\"Dest\"])\n", - "need[\"Dest\"] = c\n", - "c = k.fit_transform(need[\"Origin\"])\n", - "need[\"Origin\"] = c\n", - "c = k.fit_transform(need[\"WeatherCode\"])\n", - "need[\"WeatherCode\"] = c\n", - "f=np.asarray(need)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "mFpWvBj6x6lC", - "colab_type": "code", - "outputId": "90df5805-cd26-4e28-f606-28b921a681d9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "pf1['DepDel15']=pf1.DepDel15.astype(int)\n", - "l=np.asarray(pf1[\"DepDel15\"])\n", - "l" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([0, 0, 0, ..., 1, 0, 0])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "MFFaA4VE6cps", - "colab_type": "code", - "colab": {} - }, - "source": [ - "need.to_csv(\"/content/gdrive/My Drive/4.csv\")" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "7Fd3ODR385nW", - "colab_type": "code", - "colab": {} - }, - "source": [ - "f_train, f_test, l_train, l_test = train_test_split(f, l, test_size=0.20, random_state=42)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "REiK6VhAJ3U_", - "colab_type": "text" - }, - "source": [ - "**EXTRA TREES**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "bNgm8RKUKsJc", - "colab_type": "code", - "outputId": "31a92513-61b5-40bd-ee41-af339cd474af", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - } - }, - "source": [ - "#before sampling\n", - "et = ExtraTreesClassifier()\n", - "et.fit(f_train,l_train)\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", - " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" - ], - "name": "stderr" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dCZJfFgSMhAa", - "colab_type": "code", - "outputId": "d44111d6-ba10-464c-968c-267835fac336", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 244 - } - }, - "source": [ - "pred = et.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "error", - "ename": "NameError", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0met\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprecision\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mprecision_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mrecall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecall_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mf1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf1_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"weighted\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"Precision:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecision\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'et' is not defined" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "51QiB7EMM51I", - "colab_type": "code", - "outputId": "204d38a3-8704-4db8-96ee-6a3c3d48b5ee", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 181 - } - }, - "source": [ - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 0 0.87 0.95 0.91 228878\n", - " 1 0.71 0.45 0.55 58743\n", - "\n", - " accuracy 0.85 287621\n", - " macro avg 0.79 0.70 0.73 287621\n", - "weighted avg 0.84 0.85 0.84 287621\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "FWhcs0K2NYUh", - "colab_type": "code", - "outputId": "848d272e-9fe3-4823-c1f7-05981e399ba1", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 292 - } - }, - "source": [ - "#oversampled output\n", - "sos = SMOTE(random_state=42)\n", - "x, y = sos.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "rg =ExtraTreesClassifier()\n", - "rg.fit(f_train,l_train)\n", - "pred = rg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", - " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Precision: 0.9068624599831894\n", - "Recall: 0.904870251810186\n", - "F1 Score: 0.9047613202127298\n", - " precision recall f1-score support\n", - "\n", - " 0 0.88 0.94 0.91 228508\n", - " 1 0.94 0.87 0.90 229456\n", - "\n", - " accuracy 0.90 457964\n", - " macro avg 0.91 0.90 0.90 457964\n", - "weighted avg 0.91 0.90 0.90 457964\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_l-8802v1XcA", - "colab_type": "code", - "outputId": "6f1ba080-41ec-4841-88f8-6218ac580653", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 292 - } - }, - "source": [ - "\n", - "# Random Undersampling\n", - "ru = RandomUnderSampler(random_state=42)\n", - "x, y = ru.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "r =ExtraTreesClassifier()\n", - "r.fit(f_train,l_train)\n", - "pred = r.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", - " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Precision: 0.7472095814587808\n", - "Recall: 0.7444448234110405\n", - "F1 Score: 0.7437519324824997\n", - " precision recall f1-score support\n", - "\n", - " 0 0.72 0.80 0.76 58571\n", - " 1 0.77 0.69 0.73 58707\n", - "\n", - " accuracy 0.74 117278\n", - " macro avg 0.75 0.74 0.74 117278\n", - "weighted avg 0.75 0.74 0.74 117278\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "n4zyuBjQJgLA", - "colab_type": "text" - }, - "source": [ - "**RANDOM FOREST**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dOFgV7AT9b7B", - "colab_type": "code", - "outputId": "b4f072de-5cd4-4c42-8810-febbaf141067", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 201 - } - }, - "source": [ - "#before sampling\n", - "ran = RandomForestClassifier()\n", - "ran.fit(f_train,l_train)\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", - " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", - " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=10,\n", - " n_jobs=None, oob_score=False, random_state=None,\n", - " verbose=0, warm_start=False)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 18 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "0b4C4gYb90kF", - "colab_type": "code", - "outputId": "f266aa34-dac5-4142-e4d6-c910f13ed814", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - } - }, - "source": [ - "pred = ran.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Precision: 0.7908160255096451\n", - "Recall: 0.7880079810365115\n", - "F1 Score: 0.7875114091747006\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6lfk_-QPNBSp", - "colab_type": "code", - "outputId": "258d7f63-0288-47f1-8381-14cd5542d576", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 292 - } - }, - "source": [ - "#oversampled output\n", - "sos = SMOTE(random_state=42)\n", - "x, y = sos.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "rg = RandomForestClassifier()\n", - "rg.fit(f_train,l_train)\n", - "pred = rg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", - " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Precision: 0.9146826332086521\n", - "Recall: 0.9114886759657964\n", - "F1 Score: 0.9113267780959348\n", - " precision recall f1-score support\n", - "\n", - " 0 0.88 0.96 0.92 228508\n", - " 1 0.95 0.87 0.91 229456\n", - "\n", - " accuracy 0.91 457964\n", - " macro avg 0.91 0.91 0.91 457964\n", - "weighted avg 0.91 0.91 0.91 457964\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IQyqICuh4Rug", - "colab_type": "text" - }, - "source": [ - "**LOGISTIC REGRESSION**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JAXLCGcg4FPm", - "colab_type": "code", - "outputId": "3e574e8a-6b7e-4389-f615-884e7eeae5af", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 165 - } - }, - "source": [ - "#before sampling\n", - "lg = LogisticRegression()\n", - "lg.fit(f_train,l_train)\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='warn', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='warn', tol=0.0001, verbose=0,\n", - " warm_start=False)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 21 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "1SfMUDnJ4ytt", - "colab_type": "code", - "outputId": "bfd786cf-17ce-4cd9-f90c-34de53419368", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - } - }, - "source": [ - "pred = lg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Precision: 0.6153303766290811\n", - "Recall: 0.6150046728563817\n", - "F1 Score: 0.6146723066459795\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "MFpprCUv5IFC", - "colab_type": "code", - "outputId": "0b67b00e-edb5-409e-d633-2d42ba5ca8d3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 292 - } - }, - "source": [ - "# undersampled output\n", - "rus = RandomUnderSampler(random_state=42)\n", - "x, y = rus.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "lg = LogisticRegression()\n", - "lg.fit(f_train,l_train)\n", - "pred = rg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Precision: 0.934776416365854\n", - "Recall: 0.9280427701700233\n", - "F1 Score: 0.9277686765240396\n", - " precision recall f1-score support\n", - "\n", - " 0 0.88 0.99 0.93 58571\n", - " 1 0.99 0.87 0.92 58707\n", - "\n", - " accuracy 0.93 117278\n", - " macro avg 0.93 0.93 0.93 117278\n", - "weighted avg 0.93 0.93 0.93 117278\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YQMvmoss5mgK", - "colab_type": "code", - "outputId": "5f39c5ce-483c-4b18-e448-94e9ac6e5abf", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 292 - } - }, - "source": [ - "#oversampled output\n", - "sos = SMOTE(random_state=42)\n", - "x, y = sos.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "lg = LogisticRegression()\n", - "lg.fit(f_train,l_train)\n", - "pred = rg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Precision: 0.9146826332086521\n", - "Recall: 0.9114886759657964\n", - "F1 Score: 0.9113267780959348\n", - " precision recall f1-score support\n", - "\n", - " 0 0.88 0.96 0.92 228508\n", - " 1 0.95 0.87 0.91 229456\n", - "\n", - " accuracy 0.91 457964\n", - " macro avg 0.91 0.91 0.91 457964\n", - "weighted avg 0.91 0.91 0.91 457964\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HJdqEtOI5_Dc", - "colab_type": "text" - }, - "source": [ - "**DECISION TREE**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "pBb344bD6X7C", - "colab_type": "code", - "outputId": "2fb2420a-a225-410f-a1ff-187e0f12fc9a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 126 - } - }, - "source": [ - "#before sampling\n", - "dg = tree.DecisionTreeClassifier()\n", - "dg.fit(f_train,l_train)\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", - " max_features=None, max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, presort=False,\n", - " random_state=None, splitter='best')" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 25 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6v0jyL876pXi", - "colab_type": "code", - "outputId": "0c1a6773-ca62-4079-ebb1-24a35146f07b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - } - }, - "source": [ - "pred = dg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Precision: 0.9099632535050411\n", - "Recall: 0.9099405193421317\n", - "F1 Score: 0.9099384765292666\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yWOMxf4l65o0", - "colab_type": "code", - "outputId": "8bfabf59-a1f8-4cc5-9301-6541ad09548a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 235 - } - }, - "source": [ - "# undersampled output\n", - "rus = RandomUnderSampler(random_state=42)\n", - "x, y = rus.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "dg=tree.DecisionTreeClassifier()\n", - "dg.fit(f_train,l_train)\n", - "pred = dg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Precision: 0.7962277290798526\n", - "Recall: 0.7962277665035216\n", - "F1 Score: 0.796227680870621\n", - " precision recall f1-score support\n", - "\n", - " 0 0.80 0.80 0.80 58571\n", - " 1 0.80 0.80 0.80 58707\n", - "\n", - " accuracy 0.80 117278\n", - " macro avg 0.80 0.80 0.80 117278\n", - "weighted avg 0.80 0.80 0.80 117278\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Z5mZjeS77RbE", - "colab_type": "code", - "outputId": "1fa6778c-c412-4c0e-ccbf-b85f4d2b9f1d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 235 - } - }, - "source": [ - "#oversampled output\n", - "sos = SMOTE(random_state=42)\n", - "x, y = sos.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "dg=tree.DecisionTreeClassifier()\n", - "dg.fit(f_train,l_train)\n", - "pred = dg.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Precision: 0.9095492394117647\n", - "Recall: 0.909527823147671\n", - "F1 Score: 0.9095258643003094\n", - " precision recall f1-score support\n", - "\n", - " 0 0.91 0.91 0.91 228508\n", - " 1 0.91 0.91 0.91 229456\n", - "\n", - " accuracy 0.91 457964\n", - " macro avg 0.91 0.91 0.91 457964\n", - "weighted avg 0.91 0.91 0.91 457964\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BXF3EMUD9ix_", - "colab_type": "text" - }, - "source": [ - "**GRADIENT BOOSTING**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "zVCw5IDa_IU3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#before sampling\n", - "gb = GradientBoostingClassifier()\n", - "gb.fit(f_train,l_train)\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "AjsubeaO_khM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "pred = gb.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "nXRgEHI7_nIi", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# undersampled output\n", - "rus = RandomUnderSampler(random_state=42)\n", - "x, y = rus.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "gb = GradientBoostingClassifier()\n", - "gb.fit(f_train,l_train)\n", - "pred = gb.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "3XVZ5-7D_1uC", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#oversampled output\n", - "sos = SMOTE(random_state=42)\n", - "x, y = sos.fit_resample(f, l)\n", - "f_train, f_test, l_train, l_test = train_test_split(x, y, test_size=0.20, random_state=42)\n", - "gb = GradientBoostingClassifier()\n", - "gb.fit(f_train,l_train)\n", - "pred = gb.predict(f_test)\n", - "precision = precision_score(l_test, pred, average=\"weighted\")\n", - "recall = recall_score(l_test, pred, average=\"weighted\")\n", - "f1 = f1_score(l_test, pred, average=\"weighted\")\n", - "print (\"Precision:\", precision)\n", - "print (\"Recall:\", recall)\n", - "print (\"F1 Score:\", f1)\n", - "print(classification_report(l_test, pred))" - ], - "execution_count": 0, - "outputs": [] - } - ] -} \ No newline at end of file