Page 1 of 1

Q3. Remove each parameter one by one and find their effect on the model. import pandas as pd import numpy as np from skl

Posted: Mon Jun 06, 2022 1:49 pm
by answerhappygod
Q3. Remove each parameter one by one and find their effect on
the model.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
tennis_data = pd.read_csv("tennis_dataset.csv")
print(tennis_data)
#convert categorical data to numerical data
tennis_data['outlook'] = tennis_data['outlook'].map({'sunny':0,
'overcast':1, 'rainy':2})
tennis_data['windy'] = tennis_data['windy'].map({'FALSE':0,
'TRUE':1})
tennis_data['play'] = tennis_data['play'].map({'no':0,
'yes':1})
print(tennis_data)
#split data into training and testing data
X = tennis_data.values[:, 0:3]
Y = tennis_data.values[:,4]
X_train, X_test, y_train, y_test = train_test_split( X, Y,
test_size = 0.3, random_state = 100)
#create a decision tree classifier
clf_gini = DecisionTreeClassifier(criterion = "gini", random_state
= 100, max_depth=3, min_samples_leaf=5)
clf_gini.fit(X_train, y_train)
#predict the response for test dataset
y_pred = clf_gini.predict(X_test)
print(y_pred)
#calculate accuracy
print("Accuracy is ", accuracy_score(y_test,y_pred)*100)
#visualize the decision tree
tree.export_graphviz(clf_gini, out_file='tree.dot')
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
dataset:
tennis_dataset.csv :