que a menudo utilizan los datos de EUROSTAT y que era bastante molesto que los datos no se pudieron cargar recta hacia adelante al interior R. He escrito este fragmento de conseguir cualquier conjunto de datos proporcionado por la instalación de descarga a granel de EUROSTAT http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&dir=dic%2Fen¿Es esta la solución para obtener datos de EUROSTAT en R?
¿Hay una ¿mejor manera? ..la se trabajó para mí:
#this library is used to download data from eurostat and to find datasets
#later extend to extend to find datasets with certain dimensions
#download data from eurostat
#unpack and convert to dataframe
#load label descriptions
#load factors
#save as r data object
datasetname="ebd_all"
LANGUAGE="en"
install.packages("RCurl")
library(RCurl)
library(data.table)
library(reshape)
library(stringr)
baseurl="http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&file=data%2F"
fullfilename=paste(datasetname,".tsv.gz",sep="")
temp <- paste(tempfile(),".gz",sep="")
download.file(paste(baseurl,fullfilename,sep=""),temp)
dataconnection <- gzfile(temp)
d=read.delim(dataconnection)
longdata=melt(d,id=colnames(d)[1])
firstname=colnames(d)[1] # remove .time and count how many headings are there
firstname=substr(firstname,1,nchar(firstname)-nchar(".time"))
headings=toupper(strsplit(firstname,".",fixed=TRUE)[[1]])
headingcount=length(headings)
colnames(longdata)=c("dimensions","time","value")
#get the data on the dimension tables
df=data.frame(dimensions=as.character(longdata[,"dimensions"]))
df = transform(df, dimensions= colsplit(dimensions, split = "\\,",names=headings))
dimensions=data.table(df$dimensions)
#download the dimension labels - save headings as better variable
dimfile=paste("http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&file=dic%2F",LANGUAGE,"%2Fdimlst.dic",sep="")
temp <- paste(tempfile(),".gz",sep="")
download.file(dimfile,temp)
dataconnection <- gzfile(temp)
dimdata=read.delim(dataconnection,header=FALSE)
colnames(dimdata)=c("colname","desc")
lab=dimdata$desc
names(lab)=dimdata$colname
#create headings that speak for themselves for columns
speakingheadings=as.character(lab[headings])
#download factors for each heading and add
for(heading in headings){
factorfile=paste("http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&file=dic%2F",LANGUAGE,"%2F",tolower(heading),".dic",sep="")
temp <- paste(tempfile(),".gz",sep="")
download.file(factorfile,temp)
dataconnection <- gzfile(temp)
factordata=read.delim(dataconnection,header=FALSE)
colnames(factordata)=c(heading,paste(heading,"_desc",sep=""))
#join the heading to the heading dataset
dimensions=merge(dimensions,factordata,by=heading,all.x=TRUE)
}
#at the end at speaking headings
setnames(dimensions,colnames(dimensions)[1:length(speakingheadings)],speakingheadings)
#add data columns by writing and reading again---FASTER ;-)
temp=tempfile()
values=data.frame(value=as.character(longdata$value))
values = transform(values, value= colsplit(value, split = "\\ ",names=c("value","flag")))
values=values$value
values=data.table(values)
values$value=as.character(values$value)
values$flag=as.character(values$flag)
values[value==flag,flag:=NA]
values$value=as.double(values$value)
eurostatdata=cbind(dimensions,time=longdata$time,values)
save(eurostatdata,file=paste(datasetname,".RData"))
Un paquete R llamado [eurostat] (https://cran.r-project.org/web/packages/eurostat/index.html) fue lanzado en 2015. Una bonita [viñeta] (https: // github. com/rOpenGov/eurostat/blob/master/vignettes/2015-RJournal/lahti-huovari-kainu-biecek.md) proporciona ejemplos de instrucciones para descargar y visualizar los datos de Eurostat. El autor del paquete [SmarterPoland] (https://cran.r-project.org/web/packages/SmarterPoland/index.html) mencionado a continuación es uno de los coautores del paquete Eurostat. –