我们身处数据时代,我们倡导数字经济,我们都是数据人。因此,我们合理分析数据以发现有意义的洞见就显得尤为重要。
数据可视化分析是一个重要的工具,了解数据可视化分析怎么做,可以让你从数据中挖掘出关键的洞见。如果你的分析结果能够恰当的可视化,那可以与利益相关者做有效地沟通。
R语言的可视化包
1 graphics包,R语言base包之一,用于绘制基础的可视化图形,例如:散点图、盒箱图等。
2 lattice包,它在graphics包的基础上做了改进和拓新,适用于多变量数据可视化分析。
3 ggplot2包,它基于Grammar of Graphics的原理设计和实现各种复杂的可视化,把可视化采用分图层叠加的方式来创建。
4 plotly包,它利用开源JavaScript图形库创建可交互式的Web图形。
等等
R语言实现常用的数据可视化分析怎么做
数据准备,选择了R语言自带的2个数据集,分别如下:
1airquality:1973年5月到9月纽约每日的空气质量测量
2mtcars:Motor Trend Car Road Tests
加载R包和数据集
参考代码
# 加载R包
library(pacman)
p_load(lattice, ggplot2, plotly)
# 数据准备
data(“airquality”)
data(“mtcars”)
数据可视化
1 graphics包画图
1.1 散点图
参考代码
# 1.1 散点图
plot(airquality$Ozone, airquality$Wind)
结论:
Wind与Ozone具有一定的负相关性。
1.2 散点图矩阵
参考代码
# 1.2 散点图矩阵
plot(airquality)
1.3 点线图
参考代码
# 1.3 点线图
plot(airquality$Ozone, type = ‘b’)
1.4 图形添加标签和标题
参考代码
# 1.4 添加标签和标题
plot(airquality$Ozone,
xlab = ‘观察的index’,
ylab = ‘臭氧的浓度’,
main = ‘纽约的臭氧水平’,
col = ‘green’)
1.5 柱状图
参考代码
# 1.5 柱状图
barplot(airquality$Ozone,
main = ‘空气中臭氧的浓度’,
xlab = ‘ozone levels’,
col=’red’,
horiz = FALSE)
1.6 直方图
参考代码
# 1.6 直方图
hist(airquality$Solar.R,
main = ‘空气中太阳辐射度的值’,
xlab = ‘Solar rad.’,
col=’red’)
1.7 盒箱图
参考代码
# 1.7 盒箱图
boxplot(airquality[,0:4],
main=’多变量盒箱图’)
1.8 多图组合展示
参考代码
# 1.8 多图组合展示
par(mfrow=c(3,3), mar=c(2,5,2,1), las=1, bty=”n”)
plot(airquality$Ozone)
plot(airquality$Ozone, airquality$Wind)
plot(airquality$Ozone, type= “c”)
plot(airquality$Ozone, type= “s”)
plot(airquality$Ozone, type= “h”)
barplot(airquality$Ozone,
main = ‘Ozone Concenteration in air’,
xlab = ‘ozone levels’,
col=’red’,
horiz = TRUE)
hist(airquality$Solar.R)
boxplot(airquality$Solar.R)
boxplot(airquality[,0:4],
main=’多变量盒箱图’)
2 lattice包画图
2.1 散点图矩阵
参考代码
# 2.1 散点图矩阵
splom(mtcars[c(1,3,4,5,6)], main=”MTCARS Data”)
2.2 两个因子约束下的散点图
参考代码
# 2.2 两个因子约束的散点图
# 因子变量类型
gear_factor <- factor(mtcars$gear,levels=c(3,4,5),
labels=c(“3gears”,”4gears”,”5gears”))
cyl_factor <- factor(mtcars$cyl,levels=c(4,6,8),
labels=c(“4cyl”,”6cyl”,”8cyl”))
xyplot(mtcars$mpg ~ mtcars$wt|cyl_factor*gear_factor,
main=”Scatterplots : Cylinders and Gears”,
ylab=”Miles/Gallon”,
xlab=”Weight of Car”)
3 ggplot2包画图
3.1 散点图
参考代码
# 3.1 散点图
ggplot(data = mtcars, mapping = aes(x = wt, y = mpg)) +
geom_point() +
theme_classic()
3.2 图形修饰
参考代码
# 3.2 图形修饰
ggplot(data = mtcars,
mapping = aes(x = wt,
y = mpg,
color = as.factor(cyl),
size = qsec)) +
geom_point() +
theme_classic()
4 plotly包画图
4.1 散点图
参考代码
# 4.1 散点图
p <- plot_ly(data = mtcars, x = ~hp, y = ~wt)
p
4.2 多直线图
参考代码
# 4.2 多直线图
data1 <- rnorm(100, mean = 10)
data2 <- rnorm(100, mean = 0)
data3 <- rnorm(100, mean = -10)
x <- c(1:100)
data <- data.frame(x, data1, data2, data3)
p <- plot_ly(data, x = ~x)%>%
add_trace(y = ~data1, name = ‘data1’,mode = ‘lines’) %>%
add_trace(y = ~data2, name = ‘data2’, mode = ‘lines+markers’) %>%
add_trace(y = ~data3, name = ‘data3’, mode = ‘markers’)
p
附录:完整参考代码
####################
#R语言做数据可视化分析
###################
# 加载R包
library(pacman)
p_load(lattice, ggplot2, plotly)
# 数据准备
data(“airquality”)
data(“mtcars”)
# 1 graphics包画图
# 1.1 散点图
plot(airquality$Ozone, airquality$Wind)
# 1.2 散点图矩阵
plot(airquality)
# 1.3 点线图
plot(airquality$Ozone, type = ‘b’)
# 1.4 添加标签和标题
plot(airquality$Ozone,
xlab = ‘观察的index’,
ylab = ‘臭氧的浓度’,
main = ‘纽约的臭氧水平’,
col = ‘green’)
# 1.5 柱状图
barplot(airquality$Ozone,
main = ‘空气中臭氧的浓度’,
xlab = ‘ozone levels’,
col=’red’,
horiz = FALSE)
# 1.6 直方图
hist(airquality$Solar.R,
main = ‘空气中太阳辐射度的值’,
xlab = ‘Solar rad.’,
col=’red’)
# 1.7 盒箱图
boxplot(airquality[,0:4],
main=’多变量盒箱图’)
# 1.8 多图组合展示
par(mfrow=c(3,3), mar=c(2,5,2,1), las=1, bty=”n”)
plot(airquality$Ozone)
plot(airquality$Ozone, airquality$Wind)
plot(airquality$Ozone, type= “c”)
plot(airquality$Ozone, type= “s”)
plot(airquality$Ozone, type= “h”)
barplot(airquality$Ozone,
main = ‘Ozone Concenteration in air’,
xlab = ‘ozone levels’,
col=’red’,
horiz = TRUE)
hist(airquality$Solar.R)
boxplot(airquality$Solar.R)
boxplot(airquality[,0:4],
main=’多变量盒箱图’)
# 2 lattice包画图
# 2.1 散点图矩阵
splom(mtcars[c(1,3,4,5,6)], main=”MTCARS Data”)
# 2.2 两个因子组合的散点图
# 因子变量类型
gear_factor <- factor(mtcars$gear,levels=c(3,4,5),
labels=c(“3gears”,”4gears”,”5gears”))
cyl_factor <- factor(mtcars$cyl,levels=c(4,6,8),
labels=c(“4cyl”,”6cyl”,”8cyl”))
xyplot(mtcars$mpg ~ mtcars$wt|cyl_factor*gear_factor,
main=”Scatterplots : Cylinders and Gears”,
ylab=”Miles/Gallon”,
xlab=”Weight of Car”)
# 3 ggplot2包画图
# 3.1 散点图
ggplot(data = mtcars, mapping = aes(x = wt, y = mpg)) +
geom_point() +
theme_classic()
# 3.2 图形修饰
ggplot(data = mtcars,
mapping = aes(x = wt, y = mpg, color = as.factor(cyl), size = qsec)) +
geom_point() +
theme_classic()
# 4 plotly包画图
# 4.1 散点图
p <- plot_ly(data = mtcars, x = ~hp, y = ~wt)
p
# 4.2 多直线图
data1 <- rnorm(100, mean = 10)
data2 <- rnorm(100, mean = 0)
data3 <- rnorm(100, mean = -10)
x <- c(1:100)
data <- data.frame(x, data1, data2, data3)
p <- plot_ly(data, x = ~x)%>%
add_trace(y = ~data1, name = ‘data1’,mode = ‘lines’) %>%
add_trace(y = ~data2, name = ‘data2’, mode = ‘lines+markers’) %>%
add_trace(y = ~data3, name = ‘data3’, mode = ‘markers’)
评论区(0)