top of page

R Programming Assignment Help | Exploratory Data Analysis(EDA) With R Programming

realcode4you

Final Output In Fancy Format



Install all related libraries which is given below:

library(dplyr)
library(plotly)
library(purrr) 
library(cluster)
library(NbClust)
library(factoextra)
library(IRdisplay)
library(plyr)
library(tidyverse)
library(ggpubr)
library(GGally)
library(factoextra)
library(RColorBrewer)
library(ggplotify)
library(hrbrthemes)
library(dendextend)
library(plyr)

Read Data

data <- read.csv("Mall_Customers.csv")
data

Result


















...

...


Check all data type

str(data)

Output:

## 'data.frame':    200 obs. of  5 variables:
##  $ CustomerID            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Gender                : chr  "Male" "Male" "Female" "Female" ...
##  $ Age                   : int  19 21 20 23 31 22 35 23 64 30 ...
##  $ Annual.Income..k..    : int  15 15 16 16 17 17 18 18 19 19 ...
##  $ Spending.Score..1.100.: int  39 81 6 77 40 76 6 94 3 72 .

# Setting a theme for the graphs
My_Theme = theme(plot.title = element_text(size = 14, face = "bold"), 
                 axis.text = element_text(size = 16),
                 axis.title = element_text(size = 16))

# Gender distribution
table(data$Gender) 

Output:

## 
## Female   Male 
##    112     88


Bar Plot For Mal And Female Count

ggplot(data, aes(Gender)) + 
  geom_histogram(fill="#9C9CEE", alpha=0.8, aes(y=(..count..)), stat="count") + 
  scale_y_continuous(breaks = seq(0, 120, 20)) +
  labs(x = "Gender", y = "Count") +
  My_Theme

Output:













Histogram For Distribution of Age

density1 <- density(data$Age)
p_age <- data %>% plot_ly(x=~Age) %>% 
  add_histogram(color=I("mediumpurple"), name = "Histogram") %>% 
  add_lines(x = density1$x, y = density1$y, fill = "tozeroy", color = I("lavender"), yaxis = "y2", name = "Density") %>% 
  layout(title = "Distribution of Age ", xaxis = list (title = "Age"), 
         yaxis2 = list(overlaying = "y", side = "right"), showlegend = FALSE)
p_age

Output:













Histogram For Distribution of Income

density2 <- density(data$Annual.Income..k..)
p_income <- data %>% plot_ly(x=~Annual.Income..k..) %>% 
  add_histogram(color=I("mediumpurple"), name = "Histogram") %>% 
  add_lines(x = density2$x, y = density2$y, fill = "tozeroy", color = I("lavender"), 
            yaxis = "y2", name="Density") %>%
  layout(title = "Distribution of Income ", xaxis = list (title = "Annual Income (k$)"), 
         yaxis2 = list(overlaying = "y", side = "right"), showlegend = FALSE)
p_income

Output:













Histogram For Distribution of Spending Score

density3<- density(data$Spending.Score..1.100.)
p_score <- data %>% plot_ly(x=~Spending.Score..1.100.) %>% 
  add_histogram(color=I("mediumpurple"), name="Histogram") %>% 
  add_lines(x = density3$x, y = density3$y, fill = "tozeroy", color = I("lavender"), yaxis = "y2", name="Density") %>% 
  layout(title = "Distribution of Spending score ", xaxis = list (title = "Spending Score"), 
         yaxis2 = list(overlaying = "y", side = "right"), showlegend = FALSE)
p_score

Output:













Multiplot

multi_plot <- function(data_, x_, group_) {     
 
  # Histogram --------------------
  
   hist_plot <- ggplot(data = data_,
                       aes(x    = x_,
                           fill = group_)) +
  geom_histogram(alpha       = 0.5,
                 show.legend = FALSE,
                 binwidth    = 8) +
  
  labs(title = 
         paste(deparse(substitute(x_)),
         deparse(substitute(group_)),
         sep = " - "),
       subtitle = "Histogram",
       x = deparse(substitute(x_)))
 
   # Boxplot ----------------------

  box_plot <- ggplot(data = data_,
                     aes(x    = x_,
                         y    = group_,
                         fill = group_)) +
  geom_boxplot(alpha       = 0.5, 
               show.legend = FALSE) +
 
  labs(title    = '',
       subtitle = "Boxplot", 
       x        = deparse(substitute(x_)),
       y        = '')

# Density Plot --------------------

  den_plot <- ggplot(data = data_,
                     aes(x    = x_,
                         fill = group_)) +
  geom_density(alpha       = 0.5,
               show.legend = FALSE) +
 
  labs(subtitle = "Density Plot", 
       x        = deparse(substitute(x_)))

  ggarrange(hist_plot, 
            ggarrange(box_plot,
                      den_plot,
                      nrow = 2,
            labels = c("B", "C")),
            ncol   = 2, labels = "A")   
}

ddply(data,
      'Gender',
      summarise,
      min = min(Age),
      mean = round(mean(Age)),
      median = median(Age),
      max = max(Age))

Output:

##   Gender min mean median max
## 1 Female  18   38     35  68
## 2   Male  18   40     37  70

Gender <- data$Gender
Age <- data$Age
multi_plot(data, data$Age, data$Gender)

Output:



Scatter Plot

scatter_ans <- data%>% 
  ggplot(aes(x = Annual.Income..k..,
             y = Spending.Score..1.100.,
             colour = Gender)) +
  geom_point(size = 2,
             alpha = 0.6,
             show.legend = FALSE) +
 
  labs(title = 'Scatterplots',
       subtitle = 'Age - Income - Score')

scatter_aa <- data %>% 
  ggplot(aes(x = Age,
             y = Annual.Income..k..,
             colour = Gender)) +
  geom_point(size = 2,
             alpha = 0.6) 

scatter_ags <- data %>% 
  ggplot(aes(x = Age,
             y = Spending.Score..1.100.,
             colour = Gender)) +
  geom_point(size = 2,
             alpha = 0.6) 
scatter_aa

Output:













set.seed(50)
#Build a k-means model for data with k = 6
model_customers <- kmeans(data[,3:5], centers = 6)
model_customers

Output:

## K-means clustering with 6 clusters of sizes 39, 21, 22, 45, 35, 38
## 
## Cluster means:
##        Age Annual.Income..k.. Spending.Score..1.100.
## 1 32.69231           86.53846               82.12821
## 2 44.14286           25.14286               19.52381
## 3 25.27273           25.72727               79.36364
## 4 56.15556           53.37778               49.08889
## 5 41.68571           88.22857               17.28571
## 6 27.00000           56.65789               49.13158
## 
## Clustering vector:
##   [1] 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2
##  [38] 3 2 3 4 3 4 6 2 3 4 6 6 6 4 6 6 4 4 4 4 4 6 4 4 6 4 4 4 6 4 4 6 6 4 4 4 4
##  [75] 4 6 4 6 6 4 4 6 4 4 6 4 4 6 6 4 4 6 4 6 6 6 4 6 4 6 6 4 4 6 4 6 4 4 4 4 4
## [112] 6 6 6 6 6 4 4 4 4 6 6 6 1 6 1 5 1 5 1 5 1 6 1 5 1 5 1 5 1 5 1 6 1 5 1 5 1
## [149] 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5
## [186] 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1
## 
## Within cluster sum of squares by cluster:
## [1] 13972.359  7732.381  4099.818  8062.133 16690.857  7742.895
##  (between_SS / total_SS =  81.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

clusters <- model_customers$cluster
p_clusters <- data %>% plot_ly(x=~Annual.Income..k.., y =~Spending.Score..1.100., z=~Age) %>%
  add_markers(color = factor(clusters)) %>%
  layout(scene = list(
    xaxis = list(title="Annual Income (k$)"), 
      yaxis = list(title="Spending Score"), 
      zaxis = list(title="Age")))
p_clusters

Output:














Hire expert to do your R programming Assignment, Project, Coursework, Homework.


Contact Us! at:

realcode4you@gmail.com

Recent Posts

See All

Comentarios


REALCODE4YOU

Realcode4you is the one of the best website where you can get all computer science and mathematics related help, we are offering python project help, java project help, Machine learning project help, and other programming language help i.e., C, C++, Data Structure, PHP, ReactJs, NodeJs, React Native and also providing all databases related help.

Hire Us to get Instant help from realcode4you expert with an affordable price.

USEFUL LINKS

Discount

ADDRESS

Noida, Sector 63, India 201301

Follows Us!

  • Facebook
  • Twitter
  • Instagram
  • LinkedIn

OUR CLIENTS BELONGS TO

  • india
  • australia
  • canada
  • hong-kong
  • ireland
  • jordan
  • malaysia
  • new-zealand
  • oman
  • qatar
  • saudi-arabia
  • singapore
  • south-africa
  • uae
  • uk
  • usa

© 2023 IT Services provided by Realcode4you.com

bottom of page