Você está na página 1de 35

14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.

ipynb - Colaboratory

QUESTIONÁRIO DE MACHINE LEARNING - PARTE I

Nome: Wallison Santos Ferreira

Ano: 2023

Disciplina: Machine Learning

Professor: Salomão Machado Mafalda

Observações: Ao avaliar o impacto das técnicas é importante deixar a rede neural com a mesma
arquitetura, ou seja, com o mesmo tamanho, quantidade de neurônios e camadas. Assim, ficará
mais justo a observação do impacto das técnicas nas redes neurais. Também, para cada
questão utilize um problema diferente. E, em cada questão, deixe um bloco para o código da
arquitetura, outro com o gráfico dos resultados e por fim seu comentário sobre o
comportamento observado. Gere um PDF e envie no classroom.

Double-click (or enter) to edit

keyboard_arrow_down Importações
import os
os.makedirs('utils_ex', exist_ok=True)
!wget -q https://github.com/mafaldasalomao/pavic_treinamento_ml/raw/main/utils/plot.py -O
!wget -q https://github.com/mafaldasalomao/pavic_treinamento_ml/raw/main/utils/samples_ge

import numpy as np
import _pickle as pkl
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.datasets import make_blobs, make_circles, make_moons, make_classification

from sklearn.metrics import accuracy_score


from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from utils_ex import plot
from utils_ex.samples_generator import make_spiral, make_square, make_cubic, make_exp, ma

%matplotlib inline

keyboard_arrow_down Funções de Ativação

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 1/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

def linear(x, derivative=False):


return np.ones_like(x) if derivative else x

def sigmoid(x, derivative=False):


if derivative:
y = sigmoid(x)
return y*(1 - y)
return 1.0/(1.0 + np.exp(-x))

def tanh(x, derivative=False):


if derivative:
y = tanh(x)
return 1 - y**2
return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

def relu(x, derivative=False):


if derivative:
return np.where(x <= 0, 0, 1)
return np.maximum(0, x)

def leaky_relu(x, derivative=False):


alpha = 0.1
if derivative:
return np.where(x <= 0, alpha, 1)
return np.where(x <= 0, alpha*x, x)

def elu(x, derivative=False):


alpha = 1.0
if derivative:
y = elu(x)
return np.where(x <= 0, y + alpha, 1)
return np.where(x <= 0, alpha*(np.exp(x) - 1), x)

# Auxiliar
def softmax(x, y_oh=None, derivative=False):
if derivative:
y_pred = softmax(x)
k = np.nonzero(y_pred * y_oh)
pk = y_pred[k]
y_pred[k] = pk * (1.0 - pk)
return y_pred
exp = np.exp(x)
return exp / np.sum(exp, axis=1, keepdims=True)

keyboard_arrow_down Funções de Custo

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 2/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

##### Regressão
def mae(y, y_pred, derivative=False):
if derivative:
return np.where(y_pred > y, 1, -1) / y.shape[0]
return np.mean(np.abs(y - y_pred))

def mse(y, y_pred, derivative=False):


if derivative:
return - (y - y_pred) / y.shape[0]
return 0.5 * np.mean((y - y_pred)**2)

##### Classificação binária


def binary_cross_entropy(y, y_pred, derivative=False):
if derivative:
return -(y - y_pred) / (y_pred * (1-y_pred) * y.shape[0])
return -np.mean(y*np.log(y_pred) + (1 - y)*np.log(1-y_pred))

def sigmoid_cross_entropy(y, y_pred, derivative=False):


y_sigmoid = sigmoid(y_pred)
if derivative:
return -(y - y_sigmoid) / y.shape[0]
return -np.mean(y * np.log(y_sigmoid) + (1 - y) * np.log(1-y_sigmoid))

##### Classificação multi-classe


def neg_log_likelihood(y_oh, y_pred, derivative=False):
k = np.nonzero(y_pred * y_oh)
pk = y_pred[k]
if derivative:
y_pred[k] = (-1.0 / pk)
return y_pred
return np.mean(-np.log(pk))

def softmax_neg_log_likelihood(y_oh, y_pred, derivative=False):


y_softmax = softmax(y_pred)
if derivative:
return -(y_oh - y_softmax) / y_oh.shape[0]
return neg_log_likelihood(y_oh, y_softmax)

keyboard_arrow_down Inicialização de pesos

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 3/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

#Inicialização de pesos
def zeros(rows, cols): #in, out
return np.zeros((rows, cols))

def ones(rows, cols):


return np.ones((rows, cols))

def random_normal(rows, cols):


return np.random.randn(rows, cols)

def random_uniform(rows, cols):


return np.random.randn(rows, cols)

def glorot_normal(rows, cols):


std_dev = np.sqrt(2.0 / (rows + cols))
return std_dev * np.random.randn(rows, cols)

def glorot_uniform(rows, cols):


limit = np.sqrt(6.0 / (rows + cols))
return 2 * limit * np.random.randn(rows, cols) - limit

keyboard_arrow_down Regularização
def l1_regularization(weights, derivative=False):
if derivative:
weights = [np.where(w < 0, -1, w) for w in weights]
return np.array([np.where(w > 0, 1, w) for w in weights])
return np.sum([np.sum(np.abs(w)) for w in weights])
def l2_regularization(weights, derivative=False):
if derivative:
return weights
return 0.5 * np.sum(weights**2)

keyboard_arrow_down Batch Generator


def batch_sequencial(x, y, batch_size=None):
batch_size = x.shape[0] if batch_size is None else batch_size
n_batches = x.shape[0] // batch_size

for batch in range(n_batches):


offset = batch_size * batch
x_batch, y_batch = x[offset:offset+batch_size], y[offset:offset+batch_size]
yield (x_batch, y_batch)

def batch_shuffle(x, y, batch_size=None):


shuffle_index = np.random.permutation(range(x.shape[0]))
return batch_sequencial(x[shuffle_index], y[shuffle_index], batch_size)

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 4/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

keyboard_arrow_down Learning Rate Decay


def time_based_decay(learning_rate, epoch, decay_rate, decay_steps=1):
return 1.0 / (1 + decay_rate * epoch)
def exponential_decay(learning_rate, epoch, decay_rate, decay_steps=1):
return learning_rate * decay_rate ** epoch
def stair_case_decay(learning_rate, epoch, decay_rate, decay_steps=1):
return learning_rate * decay_rate ** (epoch // decay_steps)
def none_decay(learning_rate, epoch, decay_rate, decay_steps=1):
return learning_rate

keyboard_arrow_down Batch Normalization


def batchnorm_forward(layer, x, is_training=True):
mu = np.mean(x, axis=0) if is_training else layer._pop_mean
var = np.var(x, axis=0) if is_training else layer._pop_var
x_norm = (x - mu) / np.sqrt(var + 1e-8)
out = layer.gamma * x_norm + layer.betta

if is_training:
layer._pop_mean = layer.bn_decay * layer._pop_mean + (1.0 - layer.bn_decay) * mu
layer._pop_var = layer.bn_decay * layer._pop_var + (1.0 - layer.bn_decay) * var
layer._bn_cache = (x, x_norm, mu, var)
return out

def batchnorm_backward(layer, dactivation):


x, x_norm, mu, var = layer._bn_cache

m = layer._activ_inp.shape[0]
x_mu = x - mu
std_inv = 1. / np.sqrt(var + 1e-8)

dx_norm = dactivation * layer.gamma


dvar = np.sum(dx_norm * x_mu, axis=0) * -0.5 * (std_inv **3)
dmu = np.sum(dx_norm * -std_inv, axis=0) + dvar * np.mean(-2.0 * x_mu, axis=0)

dx = (dx_norm * std_inv) + (dvar * 2.0 * x_mu / m) + (dmu / m)


layer._dgamma = np.sum(dactivation * x_norm, axis=0)
layer._dbetta = np.sum(dactivation, axis=0)
return dx

keyboard_arrow_down Implementação da Rede Neural

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 5/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

class Layer():
def __init__(self, input_dim, output_dim,
weights_initializer=random_normal,
bias_initializer=ones, dropout_prob=0,
reg_func=l2_regularization,
reg_strength=0.0,
batch_norm=False,
bn_decay=0.9,
is_trainable=True,
activation=linear):
self.input = None
self.weights = weights_initializer(output_dim, input_dim)
self.biases = bias_initializer(1, output_dim)
self.activation = activation
self.dropout_prob = dropout_prob
self.reg_func = reg_func
self.reg_strength = reg_strength
self._dropout_mask = None
self._activ_inp, self._activ_out = None, None
self._dweights, self._dbiases, self._prev_dweights = None, None, 0.0
self.is_trainable = is_trainable
self.batch_norm = batch_norm
self.bn_decay = bn_decay
self.gamma, self.betta = ones(1, output_dim), zeros(1, output_dim)

self._dgamma, self._dbetta = None, None


self._pop_mean, self._pop_var = zeros(1, output_dim), zeros(1, output_dim)
self._bn_cache = None

class NeuralNetwork():
def __init__(self, cost_func=mse, learning_rate=1e-3,
lr_decay_method=none_decay,
lr_decay_rate=0.0,
lr_decay_steps=1,
patience=np.inf,
momentum=0.0):
self.layers = []
self.cost_func = cost_func
self.momentum = momentum
self.learning_rate = self.lr_initial = learning_rate
self.lr_decay_method = lr_decay_method
self.lr_decay_rate = lr_decay_rate
self.lr_decay_steps = lr_decay_steps

self.patience, self.waiting = patience, 0


self._best_model, self._best_loss = self.layers, np.inf

def fit(self, x_train, y_train, x_val=None, y_val=None, epochs=100, verbose=10, batch_g


x_val, y_val = (x_train, y_train) if (x_val is None or y_val is None) else (x_val, y_

for epoch in range(epochs+1):


self.learning_rate = self.lr_decay_method(self.lr_initial, epoch, self.lr_decay_rat
https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 6/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 7/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

for x_batch, y_batch in batch_gen(x_train, y_train, batch_size):


y_pred = self.__feedforward(x_batch)
y_pred = self.__backprop(y_batch, y_pred)

loss_val = self.cost_func(y_val, self.predict(x_val))


if loss_val < self._best_loss:
self._best_model, self._best_loss = self.layers, loss_val
self.waiting = 0
else:
self.waiting += 1
if self.waiting > self.patience:
self.layers = self._best_model
print(f'Early Stopping at {epoch} epoch')
return

if epoch % verbose == 0:
loss_train = self.cost_func(y_train, self.predict(x_train))
loss_reg = (1.0 / y_train.shape[0]) * np.sum([layer.reg_strength * layer.reg_func
print("Epoch: {0:=4}/{1} loss_train: {2:.8f}+{3:.8f} = {4:.8f} loss_val= {5:.8f}"

def predict(self, x):


return self.__feedforward(x, is_training=False)

#Save model
def save(self, filepath):
pkl.dump(self, open(filepath, 'wb'), -1)
#Load model
def load(filepath):
return pkl.load(open(filepath, 'rb'))

def __feedforward(self, x, is_training=True):


self.layers[0].input = x

for current_layer, next_layer in zip(self.layers, self.layers[1:] + [Layer(0,0)]):


y = np.dot(current_layer.input, current_layer.weights.T) + current_layer.biases
y = batchnorm_forward(current_layer, y, is_training) if current_layer.batch_norm el
current_layer._dropout_mask = np.random.binomial(1, 1.0 - current_layer.dropout_pro
current_layer._activ_inp = y
current_layer._activ_out = next_layer.input = current_layer.activation(y) * (curren
return self.layers[-1]._activ_out

def __backprop(self, y, y_pred):


last_delta = self.cost_func(y, y_pred, derivative=True)
#calcular a derivada para as camadas
for layer in reversed(self.layers):
dactivation = layer.activation(layer._activ_inp, derivative=True) * last_delta * la

dactivation = batchnorm_backward(layer, dactivation) if layer.batch_norm else dacti

last_delta = np.dot(dactivation, layer.weights)


https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 8/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

layer._dweights = np.dot(dactivation.T, layer.input)


layer._dbiases = 1.0 * dactivation.sum(axis=0, keepdims=True)

for layer in reversed(self.layers):


if layer.is_trainable:
layer._dweights = layer._dweights + (1.0 / y.shape[0]) * layer.reg_strength * lay
layer._prev_dweights = - self.learning_rate * layer._dweights + self.momentum * l
layer.weights = layer.weights + layer._prev_dweights
layer.biases = layer.biases - self.learning_rate * layer._dbiases

if layer.batch_norm:
layer.gamma = layer.gamma - self.learning_rate * layer._dgamma
layer.betta = layer.betta - self.learning_rate * layer._dbetta

keyboard_arrow_down Questões
Questão 01 - Com suas palavras defina: Overfitting, underfitting, vetor gradiente, taxa de
aprendizado, épocas, camadas, neurônio, função de custo e função de ativação.

Resp. 1:

Overfitting: é quando o modelo é maior e os dados são menores. O modelo se ajusta acima do
necessário, não sendo capaz portanto de generalizar o problema para quando houver novos
exemplos de entrada. Portanto o acerto tende a ser exageradamente grande no treino, mas
tende a falhar para novos dados.

Underfitting: é quando o modelo é menor e os dados são maiores. Neste sentido, o modelo
tende a modelar o problema de forma muito errônea, por que o seu ajuste não é capaz de
satisfazer a complexidade do problema. Tende a errar muito no treino, e o acerto no teste tende
a ser maior, por que a liberdade para os dados satisfezerem o modelo é maior (o que não é
conveniente para o aprendizado da máquina).

Vetor gradiente: é um vetor que nos dá a direção para que o ponto na curva da função de custo
deve ser deslocada, no sentido de achar o menor nível. O vetor gradiente é normal em relação ao
ponto na curva, e o seu valor negativo nos diz a direção para onde o erro da máquina aprendiz é
minimizado.

Taxa de aprendizado: é o tamanho da passada que o algoritmo deve dar repetidamente até
chegar no resultado ideal. Quando o tamanho da passada é pequeno, muitas iterações
(passadas) são necessárias pra chegar ao resultado. Quando a passada é muito grande chega-
se ao resultado mais rápido, mas perde-se na precisão do modelo, devido ao risco do tamanho
do passo incidir na extrapolação do valor ideal.

Épocas: O momento do recebimento de um conjunto de indivíduos, as operações da rede


(multiplicação, soma e função de ativação) e o resultado da função de custo, e a atualização de

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 9/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

pesos (se houver), configura uma época. A época configura uma iteração por um conjunto de
indivíduos OU pacotes OU mini-pacotes, obtidos pela rede até a devolução de um resultado ou
atualizações de pesos. Geralmente itera-se sucessivas vezes suas operações até que uma
pausa definida seja alcançada ou até que um valor satisfaça o pretendido.

Camadas: estão presentes na rede neural e representa um conjunto de neurônios. A camada de


entrada e saída é a mais comum, mas uma rede neural pode ter camadas intermediárias (hidden
layers) que são usadas para lidar com problemas não-lineares ou de maiores complexidades.

Neurônio: é uma estrutura em uma rede neural usado para armazenar indíviduos ou resultados
de operações entre pesos e indivíduos na rede.

Função de custo: é a função que ajuda o aprendizado do algoritmo, calculando o quanto que o
resultado se afasta do valor real.

Função de ativação: A função de ativação transmite o aprendizado obtido entre as camadas da


rede e decide se ou quanto um neurônio irá contribuir na sua saída.

Questão 02 - Construa uma rede neural totalmente conectada, treine-a e teste-a com pelo
menos dois métodos de inicialização de pesos diferentes. Em seguida, comente o que foi
possível observar de diferente entre os métodos escolhidos.

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 10/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

x, y = make_cubic(n_samples=100, x_min=-4, x_max=4, a=1, b=0, c=-10, d=0, noise=3)


print(x.shape, y.shape)
plt.figure(figsize=(12, 6))

plt.subplot(1, 3, 1)
plt.scatter(x, y)

minmax = MinMaxScaler(feature_range=(-1, 1))


x = minmax.fit_transform(x.astype(np.float64))

input_dim, output_dim = x.shape[1], y.shape[1]

##### RANDOM NORMAL


nn = NeuralNetwork(cost_func=mse, learning_rate=0.1)
nn.layers.append(Layer(input_dim=input_dim, output_dim=10, weights_initializer=random_nor
nn.layers.append(Layer(10, 10, weights_initializer=random_normal, activation=sigmoid))
nn.layers.append(Layer(10, output_dim, weights_initializer=random_normal, activation=line

# train
nn.fit(x, y, epochs=5000, verbose=500)

# plot
plt.subplot(1, 3, 2)
plt.scatter(x, y)
plt.title('RANDOM NORMAL')
plt.plot(x, nn.predict(x), c='purple');

print('#'*20)

##### ONES
nn = NeuralNetwork(cost_func=mse, learning_rate=0.1)
nn.layers.append(Layer(input_dim=input_dim, output_dim=10, weights_initializer=ones, acti
nn.layers.append(Layer(10, 10, weights_initializer=ones, activation=sigmoid))
nn.layers.append(Layer(10, output_dim, weights_initializer=ones, activation=linear))

# train
nn.fit(x, y, epochs=5000, verbose=500)

# plot
plt.subplot(1, 3, 3)
plt.scatter(x, y)
plt.title('ONES')
plt.plot(x, nn.predict(x), c='purple');

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 11/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

(100, 1) (100, 1)
Epoch: 0/5000 loss_train: 52.02215732+0.00000000 = 52.02215732 loss_val= 52.022157
Epoch: 500/5000 loss_train: 11.58474496+0.00000000 = 11.58474496 loss_val= 11.584744
Epoch: 1000/5000 loss_train: 3.50942976+0.00000000 = 3.50942976 loss_val= 3.50942976
Epoch: 1500/5000 loss_train: 2.62644518+0.00000000 = 2.62644518 loss_val= 2.62644518
Epoch: 2000/5000 loss_train: 2.21694160+0.00000000 = 2.21694160 loss_val= 2.21694160
Epoch: 2500/5000 loss_train: 1.93857296+0.00000000 = 1.93857296 loss_val= 1.93857296
Epoch: 3000/5000 loss_train: 1.76738271+0.00000000 = 1.76738271 loss_val= 1.76738271
Epoch: 3500/5000 loss_train: 1.66114758+0.00000000 = 1.66114758 loss_val= 1.66114758
Epoch: 4000/5000 loss_train: 1.59073900+0.00000000 = 1.59073900 loss_val= 1.59073900
Epoch: 4500/5000 loss_train: 1.54010967+0.00000000 = 1.54010967 loss_val= 1.54010967
Epoch: 5000/5000 loss_train: 1.49754506+0.00000000 = 1.49754506 loss_val= 1.49754506
####################
Epoch: 0/5000 loss_train: 51.85490543+0.00000000 = 51.85490543 loss_val= 51.854905
Epoch: 500/5000 loss_train: 51.27017807+0.00000000 = 51.27017807 loss_val= 51.270178
Epoch: 1000/5000 loss_train: 51.27013299+0.00000000 = 51.27013299 loss_val= 51.270132
Epoch: 1500/5000 loss_train: 51.27008458+0.00000000 = 51.27008458 loss_val= 51.270084
Epoch: 2000/5000 loss_train: 51.27003147+0.00000000 = 51.27003147 loss_val= 51.270031
Epoch: 2500/5000 loss_train: 51.26997194+0.00000000 = 51.26997194 loss_val= 51.269971
Epoch: 3000/5000 loss_train: 51.26990371+0.00000000 = 51.26990371 loss_val= 51.269903
Epoch: 3500/5000 loss_train: 51.26982366+0.00000000 = 51.26982366 loss_val= 51.269823
Epoch: 4000/5000 loss_train: 51.26972738+0.00000000 = 51.26972738 loss_val= 51.269727
Epoch: 4500/5000 loss_train: 51.26960831+0.00000000 = 51.26960831 loss_val= 51.269608
Epoch: 5000/5000 loss_train: 51.26945633+0.00000000 = 51.26945633 loss_val= 51.269456

Acima pode-se ver exemplos de treinamento de duas redes semelhantes com uma inicializando
pesos de forma aleatória e a outra rede iniciando pesos com 1's. A inicialização aleatória trouxe
melhor resultado devido a capacidade dos pesos fornecerem mais liberdade para o modelo
realizar os ajustes nos dados.

Random Normal: 5000 - loss_train 1.49754506

Ones: 5000 - loss_train 51.26945633

Questão 03 - Construa uma rede neural totalmente conectada, treine-a e teste-a com
diferentes camadas de ativação. Em seguida, comente o que foi possível observar de diferente

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 12/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

no comportamento de uma rede neural ao alterar as camadas de ativação (mantenha a mesma


quantidade de camadas).

# Gerando dados
x, y = make_log10(n_samples=100, x_min=1, x_max=100, noise=0.3)
minmax = MinMaxScaler(feature_range=(-1, 1))
x = minmax.fit_transform(x.astype(np.float64))

plt.figure(figsize=(12, 6))

plt.subplot(1,3,1)
plt.title('LOG-10')
plt.scatter(x, y);

# SIGMOID
nn = NeuralNetwork(cost_func=mae, learning_rate=1e-1)

nn.layers.append(Layer(input_dim=input_dim, output_dim=10, activation=linear))


nn.layers.append(Layer(input_dim=10, output_dim=20, activation=sigmoid))
nn.layers.append(Layer(input_dim=20, output_dim=10, activation=sigmoid))
nn.layers.append(Layer(input_dim=10, output_dim=output_dim, activation=linear))

# train
nn.fit(x, y, epochs=5000, verbose=500)

# plot
plt.subplot(1, 3, 2)
plt.scatter(x, y)
plt.title('SIGMOID')
plt.plot(x, nn.predict(x), c='purple');

print('#'*20)

nn = NeuralNetwork(cost_func=mae, learning_rate=1e-1)

nn.layers.append(Layer(input_dim=input_dim, output_dim=10, activation=linear))


nn.layers.append(Layer(input_dim=10, output_dim=20, activation=tanh))
nn.layers.append(Layer(input_dim=20, output_dim=10, activation=tanh))
nn.layers.append(Layer(input_dim=10, output_dim=output_dim, activation=linear))

# train
nn.fit(x, y, epochs=5000, verbose=500)

# plot
plt.subplot(1, 3, 3)
plt.scatter(x, y)
plt.title('TANH')
plt.plot(x, nn.predict(x), c='purple');

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 13/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

Epoch: 0 / 5000 loss_train 2.42138316


Epoch: 500 / 5000 loss_train 0.26303659
Epoch: 1000 / 5000 loss_train 0.24027691
Epoch: 1500 / 5000 loss_train 0.23261892
Epoch: 2000 / 5000 loss_train 0.21618289
Epoch: 2500 / 5000 loss_train 0.21992310
Epoch: 3000 / 5000 loss_train 0.22892049
Epoch: 3500 / 5000 loss_train 0.21861949
Epoch: 4000 / 5000 loss_train 0.15180707
Epoch: 4500 / 5000 loss_train 0.18418501
Epoch: 5000 / 5000 loss_train 0.15056345
####################
Epoch: 0 / 5000 loss_train 2.56018430
Epoch: 500 / 5000 loss_train 0.26898435
Epoch: 1000 / 5000 loss_train 0.23744392
Epoch: 1500 / 5000 loss_train 0.18477465
Epoch: 2000 / 5000 loss_train 0.21922092
Epoch: 2500 / 5000 loss_train 0.20938884
Epoch: 3000 / 5000 loss_train 0.19934186
Epoch: 3500 / 5000 loss_train 0.20878215
Epoch: 4000 / 5000 loss_train 0.20377527
Epoch: 4500 / 5000 loss_train 0.21859780
Epoch: 5000 / 5000 loss_train 0.20820884

A implementação da arquitetura com funções de ativação sigmoidal performou melhor do que a


RELU. A sigmoid foi capaz de dar a capacidade ao algoritmo de aprender mais rápido e
generalizar melhor o conjunto de dados, iniciando em (1.04 - 0.17). Já a Tanh demorou bastante
para aprender, iniciando em boa posição (0.68 - 0.20).

Sigmoid: 5000 - loss_train 0.17920404

Tanh: 5000 - loss_train 0.20055233

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 14/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

Double-click (or enter) to edit

Questão 04 - Construa uma rede neural totalmente conectada, treine-a e teste-a sem/com pelo
menos um dos métodos de regularização (L1, L2). Em seguida, comente o que foi possível
observar de diferente entre uma rede com método de regularização e outra sem (mantenha a
mesma quantidade de camadas).

x, y = make_circles(n_samples=1000, noise=0.1, factor=0.4, random_state=616)


y = y.reshape(-1, 1)

print(x.shape, y.shape)

(1000, 2) (1000, 1)

plt.figure(figsize=(12, 6))

plt.subplot(1,2,1)
plt.scatter(x[:, 0], x[:,1])

plt.subplot(1,2,2)
plt.scatter(x[:, 0], x[:,1], c=list(np.array(y).ravel()), s=15, cmap='bwr')

<matplotlib.collections.PathCollection at 0x7efc5bd8db10>

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 15/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

import numpy as np
import matplotlib.pyplot as plt

def __softmax(x):
exp = np.exp(x)
return exp / np.sum(exp, axis=1, keepdims=True)

def __compute_meshgrid(x, y):


x_min, x_max, y_min, y_max = x[:, 0].min(), x[:, 0].max(), x[:, 1].min(), x[:, 1].max
x1, x2 = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
x_mesh = np.array([x1.ravel(), x2.ravel()]).T
return x1, x2, x_mesh

def classification_predictions_custom(x, y, is_binary, nn=None, threshold=0.0, figsize=(1


ax = plt.subplot(1, size, order)
if nn is not None:
x1, x2, x_mesh = __compute_meshgrid(x, y)
y_mesh = nn.predict(x_mesh)
y_mesh = np.where(y_mesh <= threshold, 0, 1) if is_binary else np.argmax(__softma

plt.scatter(x[:, 0], x[:, 1], c=list(np.array(y).ravel()), s=s, cmap=cmap)


title = title + " = " + str(acc)
plt.title(title)
plt.contourf(x1, x2, y_mesh.reshape(x1.shape), cmap=cmap, alpha=0.5)

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 16/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

input_dim, output_dim = x.shape[1], y.shape[1]

plt.figure(figsize=(12,6))

########################## NO REG
print("No regularization")
nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=0.1)
nn.layers.append(Layer(input_dim, 4, activation=tanh))
nn.layers.append(Layer(4, output_dim, activation=sigmoid))

nn.fit(x, y, epochs=5000, verbose=300)

y_pred = nn.predict(x)
print("")
acc = 100*accuracy_score(y, y_pred >= 0.5)
print("Acc: {:.2f}%".format(acc))
# plt.subplot(1,3,1)
classification_predictions_custom(x, y, is_binary=True, nn=nn, threshold=0.5, cmap='bwr',

########################## REG L1
print("Reg. L1")
nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=0.1)
nn.layers.append(Layer(input_dim, 4, reg_strength=1, reg_func=l1_regularization, activati
nn.layers.append(Layer(4, output_dim, reg_strength=1, reg_func=l1_regularization, activat

nn.fit(x, y, epochs=5000, verbose=300)

y_pred = nn.predict(x)
print("")
acc = 100*accuracy_score(y, y_pred >= 0.5)
print("Acc: {:.2f}%".format(acc))
# plt.subplot(1,3,2)
classification_predictions_custom(x, y, is_binary=True, nn=nn, threshold=0.5, title="L1",

########################## REG L2
print("Reg. L2")
nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=0.1)
nn.layers.append(Layer(input_dim, 4, reg_strength=1, reg_func=l1_regularization, activati
nn.layers.append(Layer(4, output_dim, reg_strength=1, reg_func=l1_regularization, activat

nn.fit(x, y, epochs=5000, verbose=300)

y_pred = nn.predict(x)
print("")
acc = 100*accuracy_score(y, y_pred >= 0.5)
print("Acc: {:.2f}%".format(acc))
# plt.subplot(1,3,3)
classification_predictions_custom(x, y, is_binary=True, nn=nn, threshold=0.5, title="L2",

## TODO: Ajeitar este plot

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 17/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

No regularization
Epoch: 0/5000 loss_train: 1.25499428+0.00000000 = 1.25499428 loss_val= 1.25499428
Epoch: 300/5000 loss_train: 0.50035096+0.00000000 = 0.50035096 loss_val= 0.50035096
Epoch: 600/5000 loss_train: 0.47104859+0.00000000 = 0.47104859 loss_val= 0.47104859
Epoch: 900/5000 loss_train: 0.45824521+0.00000000 = 0.45824521 loss_val= 0.45824521
Epoch: 1200/5000 loss_train: 0.44121130+0.00000000 = 0.44121130 loss_val= 0.44121130
Epoch: 1500/5000 loss_train: 0.32592766+0.00000000 = 0.32592766 loss_val= 0.32592766
Epoch: 1800/5000 loss_train: 0.17397664+0.00000000 = 0.17397664 loss_val= 0.17397664
Epoch: 2100/5000 loss_train: 0.12822026+0.00000000 = 0.12822026 loss_val= 0.12822026
Epoch: 2400/5000 loss_train: 0.10520166+0.00000000 = 0.10520166 loss_val= 0.10520166
Epoch: 2700/5000 loss_train: 0.09066580+0.00000000 = 0.09066580 loss_val= 0.09066580
Epoch: 3000/5000 loss_train: 0.08066445+0.00000000 = 0.08066445 loss_val= 0.08066445
Epoch: 3300/5000 loss_train: 0.07338149+0.00000000 = 0.07338149 loss_val= 0.07338149
Epoch: 3600/5000 loss_train: 0.06784440+0.00000000 = 0.06784440 loss_val= 0.06784440
Epoch: 3900/5000 loss_train: 0.06348810+0.00000000 = 0.06348810 loss_val= 0.06348810
Epoch: 4200/5000 loss_train: 0.05996379+0.00000000 = 0.05996379 loss_val= 0.05996379
Epoch: 4500/5000 loss_train: 0.05704555+0.00000000 = 0.05704555 loss_val= 0.05704555
Epoch: 4800/5000 loss_train: 0.05458118+0.00000000 = 0.05458118 loss_val= 0.05458118

Acc: 99.20%
Reg. L1
Epoch: 0/5000 loss_train: 1.16225572+0.01135547 = 1.17361119 loss_val= 1.16225572
Epoch: 300/5000 loss_train: 0.56666175+0.01129756 = 0.57795931 loss_val= 0.56666175
Epoch: 600/5000 loss_train: 0.35651010+0.01705934 = 0.37356943 loss_val= 0.35651010
Epoch: 900/5000 loss_train: 0.22100639+0.02180753 = 0.24281392 loss_val= 0.22100639
Epoch: 1200/5000 loss_train: 0.15758751+0.02506111 = 0.18264862 loss_val= 0.15758751
Epoch: 1500/5000 loss_train: 0.12403943+0.02735860 = 0.15139803 loss_val= 0.12403943
Epoch: 1800/5000 loss_train: 0.10394831+0.02910691 = 0.13305522 loss_val= 0.10394831
Epoch: 2100/5000 loss_train: 0.09076833+0.03048246 = 0.12125079 loss_val= 0.09076833
Epoch: 2400/5000 loss_train: 0.08151950+0.03159980 = 0.11311930 loss_val= 0.08151950
Epoch: 2700/5000 loss_train: 0.07468847+0.03252880 = 0.10721728 loss_val= 0.07468847
Epoch: 3000/5000 loss_train: 0.06943716+0.03331523 = 0.10275238 loss_val= 0.06943716
Epoch: 3300/5000 loss_train: 0.06526839+0.03399044 = 0.09925883 loss_val= 0.06526839
Epoch: 3600/5000 loss_train: 0.06187063+0.03457694 = 0.09644757 loss_val= 0.06187063
Epoch: 3900/5000 loss_train: 0.05903965+0.03509113 = 0.09413078 loss_val= 0.05903965
Epoch: 4200/5000 loss_train: 0.05663660+0.03554562 = 0.09218222 loss_val= 0.05663660
Epoch: 4500/5000 loss_train: 0.05456406+0.03595011 = 0.09051417 loss_val= 0.05456406
Epoch: 4800/5000 loss_train: 0.05275187+0.03631211 = 0.08906399 loss_val= 0.05275187

Acc: 99.30%
Reg. L2
Epoch: 0/5000 loss_train: 0.83712390+0.01131259 = 0.84843648 loss_val= 0.83712390
Epoch: 300/5000 loss_train: 0.51195874+0.01297594 = 0.52493468 loss_val= 0.51195874
Epoch: 600/5000 loss_train: 0.47224830+0.01509715 = 0.48734545 loss_val= 0.47224830
Epoch: 900/5000 loss_train: 0.45692211+0.01639424 = 0.47331635 loss_val= 0.45692211
Epoch: 1200/5000 loss_train: 0.44806017+0.01735986 = 0.46542003 loss_val= 0.44806017
Epoch: 1500/5000 loss_train: 0.44203160+0.01814479 = 0.46017639 loss_val= 0.44203160
Epoch: 1800/5000 loss_train: 0.43764067+0.01880240 = 0.45644307 loss_val= 0.43764067
Epoch: 2100/5000 loss_train: 0.43429940+0.01937348 = 0.45367288 loss_val= 0.43429940
Epoch: 2400/5000 loss_train: 0.43166165+0.01991236 = 0.45157401 loss_val= 0.43166165
Epoch: 2700/5000 loss_train: 0.42946812+0.02040003 = 0.44986815 loss_val= 0.42946812
Epoch: 3000/5000 loss_train: 0.42754075+0.02084857 = 0.44838932 loss_val= 0.42754075
Epoch: 3300/5000 loss_train: 0.42573809+0.02126642 = 0.44700451 loss_val= 0.42573809
Epoch: 3600/5000 loss_train: 0.42391697+0.02166277 = 0.44557974 loss_val= 0.42391697
Epoch: 3900/5000 loss_train: 0.42186947+0.02204725 = 0.44391672 loss_val= 0.42186947
Comentário: Podemos perceber
Epoch: 4200/5000 nos resultados
loss_train: acima, para separação
0.41909523+0.02243179 de círculos,
= 0.44152702 loss_val=a 0.41909523
Epoch: 4500/5000
regularização loss_train:
L1 performando 0.41354388+0.02283853
melhor em relação às técnicas= com
0.43638241
ausêncialoss_val= 0.41354388
de regularização e
Epoch: 4800/5000 loss_train: 0.38809625+0.02335534 = 0.41145159 loss_val= 0.38809625
regularização L2. Obtendo uma acurácia de 99.3% (L2). Porém, devido ao crescimento da
Acc: 85.20%
https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 18/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

acurácia não ter sido tão significativa, custando em desempenho para o algoritmo, a técnica
sem regularização se mostra mais viável para este caso, com 99.2% de acurácia (NO REG).

Questão 05 - Construa uma rede neural totalmente conectada, treine-a e teste-a com
tamanhos de batchs diferentes. Em seguida, comente o comportamento da rede com
diferentes batchs sizes.

x, y = make_log10(n_samples=1000, x_min=1, x_max=100, noise=0.5)

print(x.shape, y.shape)
plt.scatter(x, y)

(1000, 1) (1000, 1)
<matplotlib.collections.PathCollection at 0x7efc4c0cac50>

from sklearn.model_selection import train_test_split

minmax = MinMaxScaler(feature_range=(-1, 1))


x = minmax.fit_transform(x.astype(np.float64))

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)


input_dim, output_dim = x.shape[1], y.shape[1]

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 19/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

plt.figure(figsize=(12, 6))

plt.subplot(1,5,1)
plt.title('LOG-10')
plt.scatter(x, y);

# SIGMOID
nn = NeuralNetwork(cost_func=mae, learning_rate=1e-1)

nn.layers.append(Layer(input_dim=input_dim, output_dim=10, activation=tanh))


nn.layers.append(Layer(input_dim=10, output_dim=20, activation=sigmoid))
nn.layers.append(Layer(input_dim=20, output_dim=10, activation=sigmoid))
nn.layers.append(Layer(input_dim=10, output_dim=output_dim, activation=linear))

# train
nn.fit(x, y, epochs=5000, verbose=500)

# plot
plt.subplot(1, 5, 2)
plt.scatter(x, y)
plt.title('LOG-10')
plt.plot(x, nn.predict(x), c='purple');

# SIGMOID BATCH SIZE-10


nn = NeuralNetwork(cost_func=mae, learning_rate=1e-1)

nn.layers.append(Layer(input_dim=input_dim, output_dim=10, activation=tanh))


nn.layers.append(Layer(input_dim=10, output_dim=20, activation=sigmoid))
nn.layers.append(Layer(input_dim=20, output_dim=10, activation=sigmoid))
nn.layers.append(Layer(input_dim=10, output_dim=output_dim, activation=linear))

# train
nn.fit(x, y, epochs=5000, verbose=500, batch_size=10)

# plot
plt.subplot(1, 5, 3)
plt.scatter(x, y)
plt.title('LOG-10 BATCHSIZE-10')
plt.plot(x, nn.predict(x), c='purple');

# SIGMOID BATCH SIZE-20


nn = NeuralNetwork(cost_func=mae, learning_rate=1e-1)

nn.layers.append(Layer(input_dim=input_dim, output_dim=10, activation=tanh))


nn.layers.append(Layer(input_dim=10, output_dim=20, activation=sigmoid))
nn.layers.append(Layer(input_dim=20, output_dim=10, activation=sigmoid))
nn.layers.append(Layer(input_dim=10, output_dim=output_dim, activation=linear))

# train
nn.fit(x, y, epochs=5000, verbose=500, batch_size=20)

# plot
plt.subplot(1, 5, 4)
plt.scatter(x, y)
plt.title('LOG-10 BATCHSIZE-20')
https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 20/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

plt.plot(x, nn.predict(x), c='purple');

# SIGMOID BATCH SIZE-50


nn = NeuralNetwork(cost_func=mae, learning_rate=1e-1)

nn.layers.append(Layer(input_dim=input_dim, output_dim=10, activation=tanh))


nn.layers.append(Layer(input_dim=10, output_dim=20, activation=sigmoid))
nn.layers.append(Layer(input_dim=20, output_dim=10, activation=sigmoid))
nn.layers.append(Layer(input_dim=10, output_dim=output_dim, activation=linear))

# train
nn.fit(x, y, epochs=5000, verbose=500, batch_size=200)

# plot
plt.subplot(1, 5, 5)
plt.scatter(x, y)
plt.title('LOG-10 BATCHSIZE-200')
plt.plot(x, nn.predict(x), c='purple');

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 21/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

Epoch: 0/5000 loss_train: 0.41718360+0.00000000 = 0.41718360 loss_val= 0.41718360


Epoch: 500/5000 loss_train: 0.28398282+0.00000000 = 0.28398282 loss_val= 0.28398282
Epoch: 1000/5000 loss_train: 0.28301177+0.00000000 = 0.28301177 loss_val= 0.28301177
Epoch: 1500/5000 loss_train: 0.26637989+0.00000000 = 0.26637989 loss_val= 0.26637989
Epoch: 2000/5000 loss_train: 0.26505081+0.00000000 = 0.26505081 loss_val= 0.26505081
Epoch: 2500/5000 loss_train: 0.25934362+0.00000000 = 0.25934362 loss_val= 0.25934362
Epoch: 3000/5000 loss_train: 0.26008505+0.00000000 = 0.26008505 loss_val= 0.26008505
Epoch: 3500/5000 loss_train: 0.25840404+0.00000000 = 0.25840404 loss_val= 0.25840404
Epoch: 4000/5000 loss_train: 0.25748994+0.00000000 = 0.25748994 loss_val= 0.25748994
Epoch: 4500/5000 loss_train: 0.25761894+0.00000000 = 0.25761894 loss_val= 0.25761894
Epoch: 5000/5000 loss_train: 0.25413410+0.00000000 = 0.25413410 loss_val= 0.25413410
Epoch: 0/5000 loss_train: 1.16293029+0.00000000 = 1.16293029 loss_val= 1.16293029
Epoch: 500/5000 loss_train: 0.37305178+0.00000000 = 0.37305178 loss_val= 0.37305178
Epoch: 1000/5000 loss_train: 0.32171143+0.00000000 = 0.32171143 loss_val= 0.32171143
Epoch: 1500/5000 loss_train: 0.28559506+0.00000000 = 0.28559506 loss_val= 0.28559506
Epoch: 2000/5000 loss_train: 0.27237239+0.00000000 = 0.27237239 loss_val= 0.27237239
Epoch: 2500/5000 loss_train: 0.26247792+0.00000000 = 0.26247792 loss_val= 0.26247792
Epoch: 3000/5000 loss_train: 0.25987444+0.00000000 = 0.25987444 loss_val= 0.25987444
Epoch: 3500/5000 loss_train: 0.25689866+0.00000000 = 0.25689866 loss_val= 0.25689866
Epoch: Temos
Comentário: 4000/5000 loss_train:
acima 0.25705919+0.00000000
implementações = 0.25705919
de um modelo sobre os dados loss_val= 0.25705919
com comportamento
Epoch: 4500/5000 loss_train: 0.26024497+0.00000000 = 0.26024497 loss_val= 0.26024497
logaritmico
Epoch: em ordem 10
5000/5000 (Log-10). Para
loss_train: estes dados, batchsizes
0.25715778+0.00000000 maiores ajudam
= 0.25715778 o algoritmo
loss_val= a
0.25715778
se ajustar melhor,
Epoch: pois são
0/5000 capazes de
loss_train: armazenar em memória
1.58840494+0.00000000 uma quantidade
= 1.58840494 maior1.58840494
loss_val= de dados
Epoch: 500/5000 loss_train: 0.28727072+0.00000000 = 0.28727072 loss_val= 0.28727072
e. NaEpoch:
ausência de batches (todo o conjunto de dados), o algoritmo performa melhor, com
1000/5000 loss_train: 0.25679710+0.00000000 = 0.25679710 loss_val= 0.25679710
loss_val em 0.25413410,
Epoch: 1500/5000 loss_train: 0.25637869+0.00000000 = 0.25637869 loss_val= 0.25637869
Epoch: 2000/5000 loss_train: 0.25631161+0.00000000 = 0.25631161 loss_val= 0.25631161
Epoch: 2500/5000 loss_train: 0.25602505+0.00000000 = 0.25602505 loss_val= 0.25602505
Epoch:
Questão 06 -3000/5000 loss_train:
Construa uma 0.25552504+0.00000000
rede neural = 0.25552504
totalmente conectada, loss_val=
treine-a e teste-a 0.25552504
sem/com
Epoch: 3500/5000 loss_train: 0.25614263+0.00000000 = 0.25614263 loss_val= 0.25614263
learning_rate decay (selecione
Epoch: 4000/5000 um dos
loss_train: métodos implementados).
0.25753295+0.00000000 Em seguida,
= 0.25753295 comente
loss_val= o que
0.25753295
Epoch: observar
foi possível 4500/5000deloss_train: 0.25636169+0.00000000
diferente entre = 0.25636169
uma rede com learning_rate decayloss_val= 0.25636169
e outra sem
Epoch: 5000/5000 loss_train: 0.25666044+0.00000000 = 0.25666044 loss_val= 0.25666044
(mantenha
Epoch:a mesma quantidade
0/5000 de camadas).
loss_train: 1.01518110+0.00000000 = 1.01518110 loss_val= 1.01518110
Epoch: 500/5000 loss_train: 0.26898900+0.00000000 = 0.26898900 loss_val= 0.26898900
Epoch: 1000/5000 loss_train: 0.25925264+0.00000000 = 0.25925264 loss_val= 0.25925264
x, y = make_moons(200,
Epoch: noise=0.20) 0.25146399+0.00000000 = 0.25146399 loss_val= 0.25146399
1500/5000 loss_train:
y = y.reshape(-1, 1)
Epoch: 2000/5000 loss_train: 0.25213393+0.00000000 = 0.25213393 loss_val= 0.25213393
Epoch: 2500/5000 loss_train: 0.25018044+0.00000000 = 0.25018044 loss_val= 0.25018044
Epoch: 3000/5000
print(x.shape, y.shape)loss_train: 0.25033656+0.00000000 = 0.25033656 loss_val= 0.25033656
Epoch: 3500/5000 loss_train:
plt.scatter(x[:,0], x[:,1], 0.25004117+0.00000000 =s=15,
c=list(np.array(y).ravel()), 0.25004117 loss_val= 0.25004117
cmap='bwr')
Epoch: 4000/5000 loss_train: 0.25016469+0.00000000 = 0.25016469 loss_val= 0.25016469
Epoch: 4500/5000 loss_train: 0.25026793+0.00000000 = 0.25026793 loss_val= 0.25026793
Epoch: 5000/5000 loss_train: 0.24985866+0.00000000 = 0.24985866 loss_val= 0.24985866

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 22/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

(200, 2) (200, 1)
<matplotlib.collections.PathCollection at 0x7efc4b92e350>

input_dim, output_dim = x.shape[1], y.shape[1]

### SEM LEARNING RATE DECAY


nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=0.0001)
nn.layers.append(Layer(input_dim, 40, reg_strength=1, reg_func=l2_regularization, activat
nn.layers.append(Layer(40, 30, reg_strength=1, reg_func=l2_regularization, activation=rel
nn.layers.append(Layer(30, output_dim, reg_strength=1, reg_func=l2_regularization, activa

nn.fit(x, y, epochs=8000, verbose=300, batch_size=50)

y_pred = nn.predict(x)

print("Acc: {:.2f}%".format(100*accuracy_score(y, y_pred >= 0.5)))

plt.figure(figsize=(12, 6))

#plt.subplot(1, 2, 1)
classification_predictions_custom(x, y, is_binary=True, nn=nn, threshold=0.5, cmap='bwr',

### COM LEARNING RATE DECAY


nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=0.0001, lr_decay_method=
nn.layers.append(Layer(input_dim, 40, reg_strength=1, batch_norm=True, reg_func=l2_regula
nn.layers.append(Layer(40, 30, reg_strength=1, batch_norm=True, reg_func=l2_regularizatio
nn.layers.append(Layer(30, output_dim, reg_strength=1, batch_norm=True, reg_func=l2_regul

nn.fit(x, y, epochs=8000, verbose=300, batch_size=50)

y_pred = nn.predict(x)

print("Acc: {:.2f}%".format(100*accuracy_score(y, y_pred >= 0.5)))

#plt.subplot(1, 2, 2)
classification_predictions_custom(x, y, is_binary=True, nn=nn, threshold=0.5, cmap='bwr',

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 23/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

Epoch: 0/8000 loss_train: 25.47607474+3.22633323 = 28.70240796 loss_val= 25.476074


Epoch: 300/8000 loss_train: 2.88262543+3.20307548 = 6.08570091 loss_val= 2.88262543
Epoch: 600/8000 loss_train: 1.82886776+3.18488268 = 5.01375044 loss_val= 1.82886776
Epoch: 900/8000 loss_train: 1.10358051+3.16808160 = 4.27166210 loss_val= 1.10358051
Epoch: 1200/8000 loss_train: 0.66120051+3.15221573 = 3.81341624 loss_val= 0.66120051
Epoch: 1500/8000 loss_train: 0.46333108+3.13679247 = 3.60012355 loss_val= 0.46333108
Epoch: 1800/8000 loss_train: 0.36132149+3.12154853 = 3.48287003 loss_val= 0.36132149
Epoch: 2100/8000 loss_train: 0.30069526+3.10644834 = 3.40714360 loss_val= 0.30069526
Epoch: 2400/8000 loss_train: 0.26199912+3.09146399 = 3.35346311 loss_val= 0.26199912
Epoch: 2700/8000 loss_train: 0.23710543+3.07659089 = 3.31369632 loss_val= 0.23710543
Epoch: 3000/8000 loss_train: 0.21921857+3.06181207 = 3.28103064 loss_val= 0.21921857
Epoch: 3300/8000 loss_train: 0.20541339+3.04711725 = 3.25253065 loss_val= 0.20541339
Epoch: 3600/8000 loss_train: 0.19424648+3.03250724 = 3.22675372 loss_val= 0.19424648
Epoch: 3900/8000 loss_train: 0.18494605+3.01798127 = 3.20292732 loss_val= 0.18494605
Epoch: 4200/8000 loss_train: 0.17704675+3.00353771 = 3.18058446 loss_val= 0.17704675
Epoch: 4500/8000 loss_train: 0.17053021+2.98918059 = 3.15971080 loss_val= 0.17053021
Epoch: 4800/8000 loss_train: 0.16506849+2.97490584 = 3.13997433 loss_val= 0.16506849
Epoch: 5100/8000 loss_train: 0.16027091+2.96070554 = 3.12097645 loss_val= 0.16027091
Epoch: 5400/8000 loss_train: 0.15591119+2.94657978 = 3.10249097 loss_val= 0.15591119
Epoch: 5700/8000 loss_train: 0.15182559+2.93251946 = 3.08434506 loss_val= 0.15182559
Epoch: 6000/8000 loss_train: 0.14801529+2.91853590 = 3.06655119 loss_val= 0.14801529
Epoch: 6300/8000 loss_train: 0.14444178+2.90462770 = 3.04906947 loss_val= 0.14444178
Epoch: 6600/8000 loss_train: 0.14102567+2.89079411 = 3.03181978 loss_val= 0.14102567
Epoch: 6900/8000 loss_train: 0.13766426+2.87703111 = 3.01469537 loss_val= 0.13766426
Epoch: 7200/8000 loss_train: 0.13405069+2.86333592 = 2.99738661 loss_val= 0.13405069
Epoch: 7500/8000 loss_train: 0.12986079+2.84971086 = 2.97957165 loss_val= 0.12986079
Epoch: 7800/8000 loss_train: 0.12259337+2.83616504 = 2.95875842 loss_val= 0.12259337
Acc: 97.00%
Epoch: 0/8000 loss_train: 14.26381144+3.06628956 = 17.33010100 loss_val= 14.263811
Epoch: 300/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 600/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 900/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 1200/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 1500/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 1800/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 2100/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 2400/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 2700/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 3000/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 3300/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 3600/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 3900/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 4200/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 4500/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 4800/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Comentário: Acima vemos
Epoch: 5100/8000 os resultados
loss_train: do algoritmo sem learning
0.71832257+3.06609333 rate decay
= 3.78441589 e com 0.71832257
loss_val= learning
Epoch:(lr_decay_method=exponential_decay,
rate decay 5400/8000 loss_train: 0.71832257+3.06609333 = 3.78441589
lr_decay_rate=0.8, loss_val= 0.71832257
lr_decay_steps=100), onde a
Epoch: 5700/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
rede sem lr_decay
Epoch: performou
6000/8000 melhor,0.71832257+3.06609333
loss_train: atingindo 97% de acurácia. Enquantoloss_val=
= 3.78441589 que, para o0.71832257
learning
Epoch:o 6300/8000
rate decay, decaimentoloss_train: 0.71832257+3.06609333
da taxa de aprendizado, = 3.78441589
usando a abordagem loss_val=fez
logaritmica, 0.71832257
com que
Epoch: 6600/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
os passos
Epoch:de6900/8000
aprendizado fossem cada
loss_train: vez menores a cada =100
0.71832257+3.06609333 iterações,loss_val=
3.78441589 atingindo dessa
0.71832257
Epoch: 7200/8000 loss_train: 0.71832257+3.06609333
forma uma acurácia relativamente baixa (52.0%). = 3.78441589 loss_val= 0.71832257
Epoch: 7500/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Epoch: 7800/8000 loss_train: 0.71832257+3.06609333 = 3.78441589 loss_val= 0.71832257
Acc: 52.00%
Questão 07 - Construa uma rede neural totalmente conectada, treine-a e teste-a sem/com
batch normalization. Em seguida, comente o que foi possível observar de diferente entre uma
rede com batch normalization e outra sem (mantenha a mesma quantidade de camadas).
https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 24/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

x, y = make_moons(1000, noise=0.20)
y = y.reshape(-1, 1)

print(x.shape, y.shape)
plt.scatter(x[:,0], x[:,1], c=list(np.array(y).ravel()), s=15, cmap='bwr')

(1000, 2) (1000, 1)
<matplotlib.collections.PathCollection at 0x7efc4c2efd60>

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 25/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

input_dim, output_dim = x.shape[1], y.shape[1]

### SEM NORMALIZAÇÃO


nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=0.0001)
nn.layers.append(Layer(input_dim, 40, reg_strength=1, reg_func=l2_regularization, activat
nn.layers.append(Layer(40, 30, reg_strength=1, reg_func=l2_regularization, activation=rel
nn.layers.append(Layer(30, output_dim, reg_strength=1, reg_func=l2_regularization, activa

nn.fit(x, y, epochs=8000, verbose=300, batch_size=50)

y_pred = nn.predict(x)

print("Acc: {:.2f}%".format(100*accuracy_score(y, y_pred >= 0.5)))

plt.figure(figsize=(12, 6))

#plt.subplot(1, 2, 1)
classification_predictions_custom(x, y, is_binary=True, nn=nn, threshold=0.5, cmap='bwr',

### COM NORMALIZAÇÃO


nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=0.0001)
nn.layers.append(Layer(input_dim, 40, reg_strength=1, batch_norm=True, reg_func=l2_regula
nn.layers.append(Layer(40, 30, reg_strength=1, batch_norm=True, reg_func=l2_regularizatio
nn.layers.append(Layer(30, output_dim, reg_strength=1, batch_norm=True, reg_func=l2_regul

nn.fit(x, y, epochs=8000, verbose=300, batch_size=50)

y_pred = nn.predict(x)

print("Acc: {:.2f}%".format(100*accuracy_score(y, y_pred >= 0.5)))

#plt.subplot(1, 2, 2)
classification_predictions_custom(x, y, is_binary=True, nn=nn, threshold=0.5, cmap='bwr',

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 26/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

Epoch: 0/8000 loss_train: 25.96824015+0.61696085 = 26.58520100 loss_val= 25.968240


Epoch: 300/8000 loss_train: 0.17297018+0.60035420 = 0.77332439 loss_val= 0.17297018
Epoch: 600/8000 loss_train: 0.12485854+0.58613468 = 0.71099322 loss_val= 0.12485854
Epoch: 900/8000 loss_train: 0.10711783+0.57225992 = 0.67937776 loss_val= 0.10711783
Epoch: 1200/8000 loss_train: 0.09953210+0.55871564 = 0.65824775 loss_val= 0.09953210
Epoch: 1500/8000 loss_train: 0.09557977+0.54549013 = 0.64106990 loss_val= 0.09557977
Epoch: 1800/8000 loss_train: 0.09342722+0.53258432 = 0.62601155 loss_val= 0.09342722
Epoch: 2100/8000 loss_train: 0.09208815+0.51999164 = 0.61207979 loss_val= 0.09208815
Epoch: 2400/8000 loss_train: 0.09113359+0.50770182 = 0.59883540 loss_val= 0.09113359
Epoch: 2700/8000 loss_train: 0.09017207+0.49570394 = 0.58587602 loss_val= 0.09017207
Epoch: 3000/8000 loss_train: 0.08951275+0.48399677 = 0.57350952 loss_val= 0.08951275
Epoch: 3300/8000 loss_train: 0.08892111+0.47256781 = 0.56148891 loss_val= 0.08892111
Epoch: 3600/8000 loss_train: 0.08867047+0.46141730 = 0.55008777 loss_val= 0.08867047
Epoch: 3900/8000 loss_train: 0.08852338+0.45053241 = 0.53905579 loss_val= 0.08852338
Epoch: 4200/8000 loss_train: 0.08847066+0.43990564 = 0.52837630 loss_val= 0.08847066
Epoch: 4500/8000 loss_train: 0.08858284+0.42953709 = 0.51811994 loss_val= 0.08858284
Epoch: 4800/8000 loss_train: 0.08887165+0.41942145 = 0.50829310 loss_val= 0.08887165
Epoch: 5100/8000 loss_train: 0.08929666+0.40955275 = 0.49884941 loss_val= 0.08929666
Epoch: 5400/8000 loss_train: 0.08978303+0.39992047 = 0.48970350 loss_val= 0.08978303
Epoch: 5700/8000 loss_train: 0.09028572+0.39051876 = 0.48080448 loss_val= 0.09028572
Epoch: 6000/8000 loss_train: 0.09081080+0.38134156 = 0.47215236 loss_val= 0.09081080
Epoch: 6300/8000 loss_train: 0.09135580+0.37238392 = 0.46373972 loss_val= 0.09135580
Epoch: 6600/8000 loss_train: 0.09195210+0.36364185 = 0.45559395 loss_val= 0.09195210
Epoch: 6900/8000 loss_train: 0.09254090+0.35511019 = 0.44765109 loss_val= 0.09254090
Epoch: 7200/8000 loss_train: 0.09309933+0.34678206 = 0.43988140 loss_val= 0.09309933
Epoch: 7500/8000 loss_train: 0.09365160+0.33865345 = 0.43230505 loss_val= 0.09365160
Epoch: 7800/8000 loss_train: 0.09422063+0.33071882 = 0.42493946 loss_val= 0.09422063
Acc: 96.30%
Epoch: 0/8000 loss_train: 0.62629772+0.69157012 = 1.31786784 loss_val= 0.62629772
Epoch: 300/8000 loss_train: 0.45980148+0.67517001 = 1.13497149 loss_val= 0.45980148
Epoch: 600/8000 loss_train: 0.41041063+0.65915882 = 1.06956945 loss_val= 0.41041063
Epoch: 900/8000 loss_train: 0.37605275+0.64352732 = 1.01958007 loss_val= 0.37605275
Epoch: 1200/8000 loss_train: 0.35028864+0.62826651 = 0.97855515 loss_val= 0.35028864
Epoch: 1500/8000 loss_train: 0.32886998+0.61336760 = 0.94223759 loss_val= 0.32886998
Epoch: 1800/8000 loss_train: 0.31016802+0.59882201 = 0.90899003 loss_val= 0.31016802
Comentário: implementamos
Epoch: 2100/8000 acima uma
loss_train: classificação binária
0.29356214+0.58462136 = sobre os dados
0.87818350 de padrão
loss_val= 0.29356214
Epoch: 2400/8000 loss_train: 0.27871952+0.57075747 = 0.84947699 loss_val= 0.27871952
"moons", no intuito de separar as classes azul e vermelha. Para implementação sem batch
Epoch: 2700/8000 loss_train: 0.26537530+0.55722235 = 0.82259764 loss_val= 0.26537530
norm,Epoch:
durante3000/8000
o treinamento a descida
loss_train: do loss foi bem rápida= e0.79743083
0.25342263+0.54400820 se ajustou conseguindo uma
loss_val= 0.25342263
Epoch: 3300/8000 loss_train: 0.24263623+0.53110742 = 0.77374365 loss_val= 0.24263623
acurácia de 96.3%. Para implementação com batch normalization, a descida do loss aconteceu
Epoch: 3600/8000 loss_train: 0.23281881+0.51851257 = 0.75133139 loss_val= 0.23281881
mais Epoch:
devagar,3900/8000
tendo umloss_train:
esforço muito grande para atignir acurácia
0.22384225+0.50621641 de 96%.loss_val= 0.22384225
= 0.73005866
Epoch: 4200/8000 loss_train: 0.21571904+0.49421183 = 0.70993088 loss_val= 0.21571904
Epoch: 4500/8000 loss_train: 0.20823095+0.48249194 = 0.69072289 loss_val= 0.20823095
Questão 08 -4800/8000
Epoch: Construa uma rede neural
loss_train: totalmente conectada,
0.20143964+0.47104997 treine-a e teste-a
= 0.67248961 sem/com
loss_val= 0.20143964
Epoch: 5100/8000 loss_train: 0.19524344+0.45987935 = 0.65512278 loss_val= 0.19524344
dropout. Em seguida, comente o que foi possível observar de diferente entre uma rede com
Epoch: 5400/8000 loss_train: 0.18957107+0.44897363 = 0.63854469 loss_val= 0.18957107
dropout e outra
Epoch: sem (mantenha
5700/8000 a mesma
loss_train: quantidade de camadas).
0.18441441+0.43832653 = 0.62274094 loss_val= 0.18441441
Epoch: 6000/8000 loss_train: 0.17967908+0.42793192 = 0.60761099 loss_val= 0.17967908
Epoch: 6300/8000 loss_train: 0.17533301+0.41778381 = 0.59311682 loss_val= 0.17533301
Epoch: 6600/8000 loss_train: 0.17135316+0.40787635 = 0.57922951 loss_val= 0.17135316
Epoch: 6900/8000 loss_train: 0.16776170+0.39820385 = 0.56596555 loss_val= 0.16776170
Epoch: 7200/8000 loss_train: 0.16453167+0.38876072 = 0.55329239 loss_val= 0.16453167
Epoch: 7500/8000 loss_train: 0.16157077+0.37954153 = 0.54111230 loss_val= 0.16157077
Epoch: 7800/8000 loss_train: 0.15879522+0.37054097 = 0.52933619 loss_val= 0.15879522
Acc: 96.00%

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 27/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

x, y = make_blobs(n_samples=400, n_features=2, centers=[(-3, 0), (3, 0), (0, 3), (0, -3)]
y = y.reshape(-1, 1)

print(x.shape, y.shape)
plt.scatter(x[:,0], x[:,1], c=list(np.array(y).ravel()), s=15, cmap=plt.cm.viridis)

onehot = OneHotEncoder(sparse=False)
y_onehot = onehot.fit_transform(y)
print(y_onehot[::70])

(400, 2) (400, 1)
[[0. 0. 1. 0.]
[1. 0. 0. 0.]
[0. 0. 1. 0.]
[1. 0. 0. 0.]
[0. 0. 0. 1.]
[1. 0. 0. 0.]]
/usr/local/lib/python3.10/dist-packages/sklearn/preprocessing/_encoders.py:868: Futur
warnings.warn(

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 28/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

input_dim, output_dim = x.shape[1], y_onehot.shape[1]

plt.figure(figsize=(12, 6))

### SEM DROPOUT


nn = NeuralNetwork(cost_func=softmax_neg_log_likelihood, learning_rate=1e-2)
nn.layers.append(Layer(input_dim, 2, activation=relu))
nn.layers.append(Layer(2, output_dim, activation=linear))

nn.fit(x, y_onehot, epochs=1000, verbose=100)

y_pred = np.argmax(nn.predict(x), axis=1)


acc = 100*accuracy_score(y, y_pred)
print('ACC: {:.2f}%'.format(acc))
classification_predictions_custom(x, y, is_binary=False, nn=nn, title="Sem dropout", size

### COM DROPOUT


nn = NeuralNetwork(cost_func=softmax_neg_log_likelihood, learning_rate=1e-2)
nn.layers.append(Layer(input_dim, 2, activation=relu, dropout_prob=0.5))
nn.layers.append(Layer(2, output_dim, activation=linear))

nn.fit(x, y_onehot, epochs=1000, verbose=100)

y_pred = np.argmax(nn.predict(x), axis=1)


acc = 100*accuracy_score(y, y_pred)
print('ACC: {:.2f}%'.format(acc))
classification_predictions_custom(x, y, is_binary=False, nn=nn, title="Com dropout", size

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 29/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

Epoch: 0/1000 loss_train: 1.86551771+0.00000000 = 1.86551771 loss_val= 1.86551771


Epoch: 100/1000 loss_train: 1.09167541+0.00000000 = 1.09167541 loss_val= 1.09167541
Epoch: 200/1000 loss_train: 0.95179646+0.00000000 = 0.95179646 loss_val= 0.95179646
Epoch: 300/1000 loss_train: 0.85447373+0.00000000 = 0.85447373 loss_val= 0.85447373
Epoch: 400/1000 loss_train: 0.77024168+0.00000000 = 0.77024168 loss_val= 0.77024168
Epoch: 500/1000 loss_train: 0.70566493+0.00000000 = 0.70566493 loss_val= 0.70566493
Epoch: 600/1000 loss_train: 0.65715909+0.00000000 = 0.65715909 loss_val= 0.65715909
Epoch: 700/1000 loss_train: 0.61990636+0.00000000 = 0.61990636 loss_val= 0.61990636
Epoch: 800/1000 loss_train: 0.59058973+0.00000000 = 0.59058973 loss_val= 0.59058973
Epoch: 900/1000 loss_train: 0.56681055+0.00000000 = 0.56681055 loss_val= 0.56681055
Epoch: 1000/1000 loss_train: 0.54697437+0.00000000 = 0.54697437 loss_val= 0.54697437
ACC: 74.00%
Epoch: 0/1000 loss_train: 5.07378404+0.00000000 = 5.07378404 loss_val= 5.07378404
Epoch: 100/1000 loss_train: 1.21967738+0.00000000 = 1.21967738 loss_val= 1.21967738
Epoch: 200/1000 loss_train: 0.99009184+0.00000000 = 0.99009184 loss_val= 0.99009184
Epoch: 300/1000 loss_train: 0.89987875+0.00000000 = 0.89987875 loss_val= 0.89987875
Epoch: 400/1000 loss_train: 0.83831965+0.00000000 = 0.83831965 loss_val= 0.83831965
Epoch: 500/1000 loss_train: 0.79219128+0.00000000 = 0.79219128 loss_val= 0.79219128
Epoch: 600/1000 loss_train: 0.75749510+0.00000000 = 0.75749510 loss_val= 0.75749510
Epoch: 700/1000 loss_train: 0.72801190+0.00000000 = 0.72801190 loss_val= 0.72801190
Epoch: 800/1000 loss_train: 0.70434558+0.00000000 = 0.70434558 loss_val= 0.70434558
Epoch: 900/1000 loss_train: 0.68523924+0.00000000 = 0.68523924 loss_val= 0.68523924
Epoch: 1000/1000 loss_train: 0.66986808+0.00000000 = 0.66986808 loss_val= 0.66986808
ACC: 96.50%

Comentário: Na implementação acima, podemos ver que o dropout (dropout_prob=0.5) ajudou


no aprendizado da nossa rede através da inativação de neurônios, reduzindo o modelo e
ajudando a encontrar um ajuste mais conciso, com 96.5% de acurácia. Enquanto isso, o modelo
sem dropout demorou menos para ajustar seus passos, no entanto na hora de fazer a predição,
performou pior do que o dropout, com 74% de acurácia.

Questão 09 - Construa uma rede neural totalmente conectada, treine-a e teste-a sem/com
momentum. Em seguida, comente o que foi possível observar de diferente entre uma rede com
momentum e outra sem (mantenha a mesma quantidade de camadas).

x, y = make_classification(n_samples=100, n_classes=4, n_features=2, n_clusters_per_class


y = y.reshape(-1, 1)

print(x.shape, y.shape)
plt.scatter(x[:,0], x[:,1], c=list(np.array(y).ravel()), s=15, cmap=plt.cm.viridis)

onehot = OneHotEncoder(sparse=False)
y_onehot = onehot.fit_transform(y)

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 30/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

(100, 2) (100, 1)
/usr/local/lib/python3.10/dist-packages/sklearn/preprocessing/_encoders.py:868: Futur
warnings.warn(

input_dim, output_dim = x.shape[1], y_onehot.shape[1]

plt.figure(figsize=(12, 6))

#BUILD
nn = NeuralNetwork(cost_func=softmax_neg_log_likelihood, learning_rate=1e-2)
nn.layers.append(Layer(input_dim, 30,dropout_prob=0.7, activation=relu))
nn.layers.append(Layer(30, 20,dropout_prob=0.7, activation=relu))
nn.layers.append(Layer(20, output_dim, activation=linear))
#TRAIN
nn.fit(x, y_onehot, epochs=10000, verbose=1000)

#EVALUATION
y_pred = np.argmax(nn.predict(x), axis=1)

acc = 100*accuracy_score(y, y_pred)


print('ACC: {:.2f}%'.format(acc))
classification_predictions_custom(x, y, is_binary=False, nn=nn, title="Sem momentum", siz

#BUILD
nn = NeuralNetwork(cost_func=softmax_neg_log_likelihood, learning_rate=1e-2, momentum=0.4
nn.layers.append(Layer(input_dim, 30,dropout_prob=0.7, activation=relu))
nn.layers.append(Layer(30, 20,dropout_prob=0.7, activation=relu))
nn.layers.append(Layer(20, output_dim, activation=linear))
#TRAIN
nn.fit(x, y_onehot, epochs=10000, verbose=1000)

#EVALUATION
y_pred = np.argmax(nn.predict(x), axis=1)

acc = 100*accuracy_score(y, y_pred)


print('ACC: {:.2f}%'.format(acc))
classification_predictions_custom(x, y, is_binary=False, nn=nn, title="Com momentum", siz

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 31/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

Epoch: 0/10000 loss_train: 33.06136768+0.00000000 = 33.06136768 loss_val= 33.06136


Epoch: 1000/10000 loss_train: 1.07590302+0.00000000 = 1.07590302 loss_val= 1.07590302
Epoch: 2000/10000 loss_train: 0.96272602+0.00000000 = 0.96272602 loss_val= 0.96272602
Epoch: 3000/10000 loss_train: 0.92872405+0.00000000 = 0.92872405 loss_val= 0.92872405
Epoch: 4000/10000 loss_train: 0.86466803+0.00000000 = 0.86466803 loss_val= 0.86466803
Epoch: 5000/10000 loss_train: 0.82424221+0.00000000 = 0.82424221 loss_val= 0.82424221
Epoch: 6000/10000 loss_train: 0.78819557+0.00000000 = 0.78819557 loss_val= 0.78819557
Epoch: 7000/10000 loss_train: 0.77902480+0.00000000 = 0.77902480 loss_val= 0.77902480
Epoch: 8000/10000 loss_train: 0.67697661+0.00000000 = 0.67697661 loss_val= 0.67697661
Epoch: 9000/10000 loss_train: 0.63596905+0.00000000 = 0.63596905 loss_val= 0.63596905
Epoch: 10000/10000 loss_train: 0.64280732+0.00000000 = 0.64280732 loss_val= 0.6428073
ACC: 88.00%
Epoch: 0/10000 loss_train: 31.38938774+0.00000000 = 31.38938774 loss_val= 31.38938
Epoch: 1000/10000 loss_train: 1.00549334+0.00000000 = 1.00549334 loss_val= 1.00549334
Epoch: 2000/10000 loss_train: 0.94024104+0.00000000 = 0.94024104 loss_val= 0.94024104
Epoch: 3000/10000 loss_train: 0.75537479+0.00000000 = 0.75537479 loss_val= 0.75537479
Epoch: 4000/10000 loss_train: 0.64211475+0.00000000 = 0.64211475 loss_val= 0.64211475
Epoch: 5000/10000 loss_train: 0.62630109+0.00000000 = 0.62630109 loss_val= 0.62630109
Epoch: 6000/10000 loss_train: 0.56923602+0.00000000 = 0.56923602 loss_val= 0.56923602
Epoch: 7000/10000 loss_train: 0.50291955+0.00000000 = 0.50291955 loss_val= 0.50291955
Epoch: 8000/10000 loss_train: 0.47941037+0.00000000 = 0.47941037 loss_val= 0.47941037
Epoch: 9000/10000 loss_train: 0.44504345+0.00000000 = 0.44504345 loss_val= 0.44504345
Epoch: 10000/10000 loss_train: 0.41008435+0.00000000 = 0.41008435 loss_val= 0.4100843
ACC: 92.00%

Comentário: acima pode-se observar o resultado da implementação dos modelos de


classificação multi-classe com momento e sem momentum, onde o modelo com momentum
(0.4) performou melhor, atingindo uma acurácia de 92%, enquanto o modelo sem momentum

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 32/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

atingiu uma acurácia de 88%. O momentum aplica uma "força" na direção da curva de erro
durante o processo de aprendizado, ajudando o aprendiz a se ajustar mais rápido.

Questão 10 - Construa uma rede neural totalmente conectada, treine-a e teste-a sem/com
early stopping. Em seguida, comente o impacto causado por early stopping e sua utilidade.

x, y = make_classification(n_samples=1000, n_classes=4, n_features=2, n_clusters_per_clas


y = y.reshape(-1, 1)

print(x.shape, y.shape)
plt.scatter(x[:,0], x[:,1], c=list(np.array(y).ravel()), s=15, cmap=plt.cm.viridis)

(1000, 2) (1000, 1)
<matplotlib.collections.PathCollection at 0x7fbcc84d2ce0>

onehot = OneHotEncoder(sparse=False)
y_onehot = onehot.fit_transform(y)

/usr/local/lib/python3.10/dist-packages/sklearn/preprocessing/_encoders.py:868: Futur
warnings.warn(

keyboard_arrow_down Rede Neural sem Early Stop

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 33/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

input_dim, output_dim = x.shape[1], y_onehot.shape[1]

#BUILD
nn = NeuralNetwork(cost_func=softmax_neg_log_likelihood, learning_rate=1e-2, momentum=0.2
nn.layers.append(Layer(input_dim, 30,dropout_prob=0.7, activation=tanh))
nn.layers.append(Layer(30, 20,dropout_prob=0.7, activation=relu))
nn.layers.append(Layer(20, output_dim, activation=linear))
#TRAIN
nn.fit(x, y_onehot, epochs=10000, verbose=1000)

#EVALUATION
y_pred = np.argmax(nn.predict(x), axis=1)

print('ACC: {:.2f}%'.format(100*accuracy_score(y, y_pred)))


plot.classification_predictions(x, y, is_binary=False, nn=nn)

Epoch: 0/10000 loss_train: 5.35941271+0.00000000 = 5.35941271 loss_val= 5.35941271


Epoch: 1000/10000 loss_train: 0.92565626+0.00000000 = 0.92565626 loss_val= 0.92565626
Epoch: 2000/10000 loss_train: 0.86972421+0.00000000 = 0.86972421 loss_val= 0.86972421
Epoch: 3000/10000 loss_train: 0.79580998+0.00000000 = 0.79580998 loss_val= 0.79580998
Epoch: 4000/10000 loss_train: 0.75064196+0.00000000 = 0.75064196 loss_val= 0.75064196
Epoch: 5000/10000 loss_train: 0.72430004+0.00000000 = 0.72430004 loss_val= 0.72430004
Epoch: 6000/10000 loss_train: 0.67863232+0.00000000 = 0.67863232 loss_val= 0.67863232
Epoch: 7000/10000 loss_train: 0.65741012+0.00000000 = 0.65741012 loss_val= 0.65741012
Epoch: 8000/10000 loss_train: 0.63531615+0.00000000 = 0.63531615 loss_val= 0.63531615
Epoch: 9000/10000 loss_train: 0.62294920+0.00000000 = 0.62294920 loss_val= 0.62294920
Epoch: 10000/10000 loss_train: 0.60746796+0.00000000 = 0.60746796 loss_val= 0.6074679
ACC: 85.80%

keyboard_arrow_down Rede Neural com Early Stop

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 34/35
14/12/2023, 21:48 Relatorio_ML_PAVIC_Wallison.ipynb - Colaboratory

input_dim, output_dim = x.shape[1], y_onehot.shape[1]

#BUILD
nn = NeuralNetwork(cost_func=softmax_neg_log_likelihood, learning_rate=1e-2, momentum=0.2
nn.layers.append(Layer(input_dim, 30,dropout_prob=0.7, activation=tanh))
nn.layers.append(Layer(30, 20,dropout_prob=0.7, activation=relu))
nn.layers.append(Layer(20, output_dim, activation=linear))
#TRAIN
nn.fit(x, y_onehot, epochs=10000, verbose=1000)

#EVALUATION
y_pred = np.argmax(nn.predict(x), axis=1)

print('ACC: {:.2f}%'.format(100*accuracy_score(y, y_pred)))


plot.classification_predictions(x, y, is_binary=False, nn=nn)

Epoch: 0/10000 loss_train: 9.31093622+0.00000000 = 9.31093622 loss_val= 9.31093622


Epoch: 1000/10000 loss_train: 0.88888518+0.00000000 = 0.88888518 loss_val= 0.88888518
Epoch: 2000/10000 loss_train: 0.80772905+0.00000000 = 0.80772905 loss_val= 0.80772905
Early Stopping at 2151 epoch
ACC: 85.60%

https://colab.research.google.com/drive/1o0CBMEn3IFfrskD1t4FTzz-JIPgIv7_l#scrollTo=wxqeRBU1WlVJ&printMode=true 35/35

Você também pode gostar