Você está na página 1de 3

require(dplyr) require(tidyr) require(lubridate) require(stringr)

require(xlsx) require(XLConnecte) require(openxlsx)

Reservar mais memria para o R


options(java.parameters = -Xmx2048m)
res1 <- read.xlsx(/CCER/aula05/pnud.xlsx, 1) res2 <- read.xlsx( /CCER/aula05/pnud.xlsx,
read.xlsx(~/CCER/aula05/pnud.xlsx, 3)

2)

res3

<-

TBL_DF_______________________________________

pnud_muni <- tbl_df(res2) pnud_muni pnud_uf<- tbl_df(res3) # SELECT______________________________

por indice (nao recomendavel!)


pnud_muni %>% select(1:10)
pnud_muni %>% select(ano, ufn, municipio, idhm) # intervalos e funcoes auxiliares (para economizar
trabalho)
pnud_muni %>% select(ano:municipio, ufn, starts_with(idhm))
pnud_muni %>% select(municipio, ufn, ano, gini, starts_with(idhm))

FILTER________________________________________
somente estado de SP, com IDH municipal maior que 80% no ano
2010
pnud_muni %>% select(ano, ufn, municipio, idhm) %>% filter(ufn==So Paulo, idhm > .8, ano==2010)

mesma coisa que o anterior


pnud_muni %>% select(ano, ufn, municipio, idhm) %>% filter(ufn==So Paulo & idhm > .8 & ano ==
2010)

!is.na(x)
pnud_muni %>% select(ano, ufn, municipio, idhm, pea) %>% filter(!is.na(pea))

%in%
pnud_muni %>% select(ano, ufn, municipio, idhm) %>% filter(municipio %in% c(CAMPINAS, SO
PAULO))
1

MUTATE_______________________________________
pnud_muni %>% select(ano, ufn, municipio, idhm) %>% filter(ano==2010) %>% mutate(idhm_porc =
idhm * 100, idhm_porc_txt = paste(idhm_porc, %))

media de idhm_l e idhm_e


pnud_muni %>% select(ano, ufn, municipio, starts_with(idhm)) %>% filter(ano==2010) %>% mutate(idhm2 = (idhm_e + idhm_l)/2)

errado
pnud_muni %>% select(ano, starts_with(idhm)) %>% filter(ano==2010) %>% mutate(idhm2 =
mean(c(idhm_e, idhm_l)))

uma alternativa (+ demorada)


pnud_muni %>% select(ano, ufn, municipio, starts_with(idhm)) %>% filter(ano == 2010) %>% rowwise()
%>% mutate(idhm2 = mean(c(idhm_e, idhm_l)))
pnud_muni %>% select(ano,starts_with(idhm)) %>% filter(ano == 2010) %>% mutate(idh_porc=paste(idhm*100,
%))

ARRANGE______________________________________
pnud_muni %>% select(ano, ufn, municipio, idhm) %>% filter(ano==2010) %>% mutate(idhm_porc =
idhm * 100, idhm_porc_txt = paste(idhm_porc, %)) %>% arrange(idhm)
pnud_muni %>% select(ano, ufn, municipio, idhm) %>% filter(ano== 2010) %>% mutate(idhm_porc =
idhm * 100, idhm_porc_txt = paste(idhm_porc, %)) %>% arrange(desc(idhm))

SUMMARISE____________________________________
pnud_muni %>% filter(ano==2010) %>% group_by(ufn) %>% summarise(n=n(), idhm_medio=mean(idhm),
populacao_total=sum(popt)) %>% arrange(desc(idhm_medio))
pnud_muni %>% filter(ano==2010 ) %>% count(ufn)
pnud_muni %>% group_by(ano, ufn) %>% tally() %>% head # nao precisa de parenteses!

SPREAD_______________________________________
pnud_muni %>% group_by(ano, ufn) %>% summarise(populacao=sum(popt)) %>% ungroup() %>%
spread(ano, populacao)

GATHER_______________________________________
pnud_muni %>% filter(ano==2010) %>% select(ufn,
gather(tipo_idh, idh, starts_with(idhm_))

municipio,

starts_with(idhm_)) %>%

UNITE________________________________________
pnud_muni %>% select(municipio, ufn, ano) %>% unite(municipio_uf, municipio, ufn, sep=_)

SEPARATE______________________________________
pnud_muni %>% select(municipio, ufn, ano, starts_with(idhm_)) %>% filter(ano==2010) %>%
gather(tipo_idh, idh, starts_with(idhm_)) %>% separate(tipo_idh, c(nada, tipo_idh), sep=_) %>%
select(-nada)
natalia.de.siqueira@hotmail.com

JOIN__________________________________________
dados <- pnud_muni %>% distinct(ufn, ano) %>% select(ufn, ano, municipio, starts_with(idhm)) %>%
inner_join(pnud_uf, c(ufn, ano)) %>% select(ufn, ano, municipio, starts_with(idhm))

STR_DETECT____________________________________
library(stringr) pnud_muni %>% filter(ano==2010, str_detect(municipio, SO P)) %>% select(municipio,
ano, ufn, idhm)

DMY/YMD/MDY__________________________________
library(lubridate) temp <- pnud_muni %>% select(ufn, municipio, idhm, ano) %>% mutate(aux1=01,
aux2=01) %>% unite(ano_date, ano, aux1, aux2, sep=-) %>% mutate(ano_date=ymd(ano_date))
str(temp) rm(temp)

Você também pode gostar