Final Output In Fancy Format
Install all related libraries which is given below:
library(dplyr)
library(plotly)
library(purrr)
library(cluster)
library(NbClust)
library(factoextra)
library(IRdisplay)
library(plyr)
library(tidyverse)
library(ggpubr)
library(GGally)
library(factoextra)
library(RColorBrewer)
library(ggplotify)
library(hrbrthemes)
library(dendextend)
library(plyr)
Read Data
data <- read.csv("Mall_Customers.csv")
data
Result
...
...
Check all data type
str(data)
Output:
## 'data.frame': 200 obs. of 5 variables:
## $ CustomerID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Gender : chr "Male" "Male" "Female" "Female" ...
## $ Age : int 19 21 20 23 31 22 35 23 64 30 ...
## $ Annual.Income..k.. : int 15 15 16 16 17 17 18 18 19 19 ...
## $ Spending.Score..1.100.: int 39 81 6 77 40 76 6 94 3 72 .
# Setting a theme for the graphs
My_Theme = theme(plot.title = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 16),
axis.title = element_text(size = 16))
# Gender distribution
table(data$Gender)
Output:
##
## Female Male
## 112 88
Bar Plot For Mal And Female Count
ggplot(data, aes(Gender)) +
geom_histogram(fill="#9C9CEE", alpha=0.8, aes(y=(..count..)), stat="count") +
scale_y_continuous(breaks = seq(0, 120, 20)) +
labs(x = "Gender", y = "Count") +
My_Theme
Output:
Histogram For Distribution of Age
density1 <- density(data$Age)
p_age <- data %>% plot_ly(x=~Age) %>%
add_histogram(color=I("mediumpurple"), name = "Histogram") %>%
add_lines(x = density1$x, y = density1$y, fill = "tozeroy", color = I("lavender"), yaxis = "y2", name = "Density") %>%
layout(title = "Distribution of Age ", xaxis = list (title = "Age"),
yaxis2 = list(overlaying = "y", side = "right"), showlegend = FALSE)
p_age
Output:
Histogram For Distribution of Income
density2 <- density(data$Annual.Income..k..)
p_income <- data %>% plot_ly(x=~Annual.Income..k..) %>%
add_histogram(color=I("mediumpurple"), name = "Histogram") %>%
add_lines(x = density2$x, y = density2$y, fill = "tozeroy", color = I("lavender"),
yaxis = "y2", name="Density") %>%
layout(title = "Distribution of Income ", xaxis = list (title = "Annual Income (k$)"),
yaxis2 = list(overlaying = "y", side = "right"), showlegend = FALSE)
p_income
Output:
Histogram For Distribution of Spending Score
density3<- density(data$Spending.Score..1.100.)
p_score <- data %>% plot_ly(x=~Spending.Score..1.100.) %>%
add_histogram(color=I("mediumpurple"), name="Histogram") %>%
add_lines(x = density3$x, y = density3$y, fill = "tozeroy", color = I("lavender"), yaxis = "y2", name="Density") %>%
layout(title = "Distribution of Spending score ", xaxis = list (title = "Spending Score"),
yaxis2 = list(overlaying = "y", side = "right"), showlegend = FALSE)
p_score
Output:
Multiplot
multi_plot <- function(data_, x_, group_) {
# Histogram --------------------
hist_plot <- ggplot(data = data_,
aes(x = x_,
fill = group_)) +
geom_histogram(alpha = 0.5,
show.legend = FALSE,
binwidth = 8) +
labs(title =
paste(deparse(substitute(x_)),
deparse(substitute(group_)),
sep = " - "),
subtitle = "Histogram",
x = deparse(substitute(x_)))
# Boxplot ----------------------
box_plot <- ggplot(data = data_,
aes(x = x_,
y = group_,
fill = group_)) +
geom_boxplot(alpha = 0.5,
show.legend = FALSE) +
labs(title = '',
subtitle = "Boxplot",
x = deparse(substitute(x_)),
y = '')
# Density Plot --------------------
den_plot <- ggplot(data = data_,
aes(x = x_,
fill = group_)) +
geom_density(alpha = 0.5,
show.legend = FALSE) +
labs(subtitle = "Density Plot",
x = deparse(substitute(x_)))
ggarrange(hist_plot,
ggarrange(box_plot,
den_plot,
nrow = 2,
labels = c("B", "C")),
ncol = 2, labels = "A")
}
ddply(data,
'Gender',
summarise,
min = min(Age),
mean = round(mean(Age)),
median = median(Age),
max = max(Age))
Output:
## Gender min mean median max
## 1 Female 18 38 35 68
## 2 Male 18 40 37 70
Gender <- data$Gender
Age <- data$Age
multi_plot(data, data$Age, data$Gender)
Output:
Scatter Plot
scatter_ans <- data%>%
ggplot(aes(x = Annual.Income..k..,
y = Spending.Score..1.100.,
colour = Gender)) +
geom_point(size = 2,
alpha = 0.6,
show.legend = FALSE) +
labs(title = 'Scatterplots',
subtitle = 'Age - Income - Score')
scatter_aa <- data %>%
ggplot(aes(x = Age,
y = Annual.Income..k..,
colour = Gender)) +
geom_point(size = 2,
alpha = 0.6)
scatter_ags <- data %>%
ggplot(aes(x = Age,
y = Spending.Score..1.100.,
colour = Gender)) +
geom_point(size = 2,
alpha = 0.6)
scatter_aa
Output:
set.seed(50)
#Build a k-means model for data with k = 6
model_customers <- kmeans(data[,3:5], centers = 6)
model_customers
Output:
## K-means clustering with 6 clusters of sizes 39, 21, 22, 45, 35, 38
##
## Cluster means:
## Age Annual.Income..k.. Spending.Score..1.100.
## 1 32.69231 86.53846 82.12821
## 2 44.14286 25.14286 19.52381
## 3 25.27273 25.72727 79.36364
## 4 56.15556 53.37778 49.08889
## 5 41.68571 88.22857 17.28571
## 6 27.00000 56.65789 49.13158
##
## Clustering vector:
## [1] 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2
## [38] 3 2 3 4 3 4 6 2 3 4 6 6 6 4 6 6 4 4 4 4 4 6 4 4 6 4 4 4 6 4 4 6 6 4 4 4 4
## [75] 4 6 4 6 6 4 4 6 4 4 6 4 4 6 6 4 4 6 4 6 6 6 4 6 4 6 6 4 4 6 4 6 4 4 4 4 4
## [112] 6 6 6 6 6 4 4 4 4 6 6 6 1 6 1 5 1 5 1 5 1 6 1 5 1 5 1 5 1 5 1 6 1 5 1 5 1
## [149] 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5
## [186] 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1
##
## Within cluster sum of squares by cluster:
## [1] 13972.359 7732.381 4099.818 8062.133 16690.857 7742.895
## (between_SS / total_SS = 81.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
clusters <- model_customers$cluster
p_clusters <- data %>% plot_ly(x=~Annual.Income..k.., y =~Spending.Score..1.100., z=~Age) %>%
add_markers(color = factor(clusters)) %>%
layout(scene = list(
xaxis = list(title="Annual Income (k$)"),
yaxis = list(title="Spending Score"),
zaxis = list(title="Age")))
p_clusters
Output:
Hire expert to do your R programming Assignment, Project, Coursework, Homework.
Contact Us! at:
realcode4you@gmail.com
Comments