Você está na página 1de 6

EJ1SEMANA3

November 4, 2022

[1]: #Machine Learning IPN - Trabajo 22 marzo 2021


#Andres Nares Monroy


,→---------------------------------------------------------------------------

NameError Traceback (most recent call␣


,→ last)

<ipython-input-1-971af1f3062c> in <module>
----> 1 EJ1SEMANA3-PRUEBA

NameError: name 'EJ1SEMANA3' is not defined

[1]: import pandas as pd


iris = pd.read_csv("C:\\Users\ASTRONAUTA\Documents\ATOM\iris-semana3\iris2.
,→csv", header=None, names=["SL", "SW", "PL", "PW", "C"])

iris.head(10)

[1]: SL SW PL PW C
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
5 5.4 3.9 1.7 0.4 Iris-setosa
6 4.6 3.4 1.4 0.3 Iris-setosa
7 5.0 3.4 1.5 0.2 Iris-setosa
8 4.4 2.9 1.4 0.2 Iris-setosa
9 4.9 3.1 1.5 0.1 Iris-setosa

[4]: import numpy as np


iris["C1"] = iris["C"].map(lambda c: 1 if c == "Iris-setosa" else 0)
iris["C2"] = iris["C"].map(lambda c: 1 if c == "Iris-versicolor" else 0)
iris["C3"] = iris["C"].map(lambda c: 1 if c == "Iris-virginica" else 0)

1
x1 = np.array(iris["SL"])
x2 = np.array(iris["SW"])
x3 = np.array(iris["PL"])
x4 = np.array(iris["PW"])

y1 = np.array(iris["C1"])
y2 = np.array(iris["C2"])
y3 = np.array(iris["C3"])

X = np.array([
x1,
x2,
x3,
x4
]).T

Y = np.array([
y1,
y2,
y3
]).T

[ ]:

[5]: #Parte el conjunto de muestras X en X_train con 120 muestras y X_test con las␣
,→muestras restantes.

#Parte el conjunto de clases codificadas Y en Y_train con 120 clases y Y_test␣


,→con las clases restantes.

len(X)
X_train = X[:120] # del principio hasta 120
X_test = X[120:] # de 120 hasta el final
Y_train = Y[:120] # del principio hasta 120
Y_test = Y[120:] # de 120 hasta el final
Y_test

[5]: array([[0, 0, 1],


[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],

2
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1]], dtype=int64)

[6]: len(Y)

[6]: 150

[ ]:

[7]: #Crea un clasificador por árbol de decisión con sklearn.tree.


,→DesicionTreeClassifier usando X_train y Y_train. Pista: usa clf.fit(X_train,␣

,→Y_train).

X_train = X[:120] # del principio hasta 120


X_test = X[120:] # de 120 hasta el final

from sklearn.tree import DecisionTreeClassifier, plot_tree

clf = DecisionTreeClassifier()

clf.fit(X_train, Y_train)

plot_tree(clf)

[7]: [Text(148.8, 195.696, 'X[3] <= 0.8\ngini = 0.417\nsamples = 120\nvalue = [[70,


50]\n[70, 50]\n[100, 20]]'),
Text(111.60000000000001, 152.208, 'gini = 0.0\nsamples = 50\nvalue = [[0,
50]\n[50, 0]\n[50, 0]]'),
Text(186.0, 152.208, 'X[2] <= 4.95\ngini = 0.272\nsamples = 70\nvalue = [[70,
0]\n[20, 50]\n[50, 20]]'),

3
Text(111.60000000000001, 108.72, 'X[0] <= 4.95\ngini = 0.027\nsamples =
49\nvalue = [[49, 0]\n[1, 48]\n[48, 1]]'),
Text(74.4, 65.232, 'X[3] <= 1.35\ngini = 0.333\nsamples = 2\nvalue = [[2,
0]\n[1, 1]\n[1, 1]]'),
Text(37.2, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [[1, 0]\n[0, 1]\n[1,
0]]'),
Text(111.60000000000001, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [[1, 0]\n[1,
0]\n[0, 1]]'),
Text(148.8, 65.232, 'gini = 0.0\nsamples = 47\nvalue = [[47, 0]\n[0, 47]\n[47,
0]]'),
Text(260.40000000000003, 108.72, 'X[3] <= 1.75\ngini = 0.115\nsamples =
21\nvalue = [[21, 0]\n[19, 2]\n[2, 19]]'),
Text(223.20000000000002, 65.232, 'X[3] <= 1.55\ngini = 0.296\nsamples =
3\nvalue = [[3, 0]\n[1, 2]\n[2, 1]]'),
Text(186.0, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [[1, 0]\n[1, 0]\n[0,
1]]'),
Text(260.40000000000003, 21.744, 'gini = 0.0\nsamples = 2\nvalue = [[2, 0]\n[0,
2]\n[2, 0]]'),
Text(297.6, 65.232, 'gini = 0.0\nsamples = 18\nvalue = [[18, 0]\n[18, 0]\n[0,
18]]')]

[8]: #Muestra el score para X_test y Y_test. Pista: usa clf.score(X_test, Y_test).
clf.score(X_test, Y_test)

[8]: 0.8

4
[9]: #Pinta el árbol de decisión usando sklearn.tree.plot_tree(clf)
import sklearn
sklearn.tree.plot_tree(clf)

[9]: [Text(148.8, 195.696, 'X[3] <= 0.8\ngini = 0.417\nsamples = 120\nvalue = [[70,


50]\n[70, 50]\n[100, 20]]'),
Text(111.60000000000001, 152.208, 'gini = 0.0\nsamples = 50\nvalue = [[0,
50]\n[50, 0]\n[50, 0]]'),
Text(186.0, 152.208, 'X[2] <= 4.95\ngini = 0.272\nsamples = 70\nvalue = [[70,
0]\n[20, 50]\n[50, 20]]'),
Text(111.60000000000001, 108.72, 'X[0] <= 4.95\ngini = 0.027\nsamples =
49\nvalue = [[49, 0]\n[1, 48]\n[48, 1]]'),
Text(74.4, 65.232, 'X[3] <= 1.35\ngini = 0.333\nsamples = 2\nvalue = [[2,
0]\n[1, 1]\n[1, 1]]'),
Text(37.2, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [[1, 0]\n[0, 1]\n[1,
0]]'),
Text(111.60000000000001, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [[1, 0]\n[1,
0]\n[0, 1]]'),
Text(148.8, 65.232, 'gini = 0.0\nsamples = 47\nvalue = [[47, 0]\n[0, 47]\n[47,
0]]'),
Text(260.40000000000003, 108.72, 'X[3] <= 1.75\ngini = 0.115\nsamples =
21\nvalue = [[21, 0]\n[19, 2]\n[2, 19]]'),
Text(223.20000000000002, 65.232, 'X[3] <= 1.55\ngini = 0.296\nsamples =
3\nvalue = [[3, 0]\n[1, 2]\n[2, 1]]'),
Text(186.0, 21.744, 'gini = 0.0\nsamples = 1\nvalue = [[1, 0]\n[1, 0]\n[0,
1]]'),
Text(260.40000000000003, 21.744, 'gini = 0.0\nsamples = 2\nvalue = [[2, 0]\n[0,
2]\n[2, 0]]'),
Text(297.6, 65.232, 'gini = 0.0\nsamples = 18\nvalue = [[18, 0]\n[18, 0]\n[0,
18]]')]

5
[10]: # Predice cuál es la clase de las siguientes muestras:

#[
# [6.7, 3. , 5. , 1.7],
# [5.5, 2.6, 4.4, 1.2],
# [4.5, 2.3, 1.3, 0.3],
# [4.6, 3.1, 1.5, 0.2],
#]

clf.predict([
[6.7, 3. , 5. , 1.7],
[5.5, 2.6, 4.4, 1.2],
[4.5, 2.3, 1.3, 0.3],
[4.6, 3.1, 1.5, 0.2],
])

[10]: array([[0, 1, 0],


[0, 1, 0],
[1, 0, 0],
[1, 0, 0]], dtype=int64)

[ ]:

Você também pode gostar