Description

The Auto data contains information for cars like horsepower, miles per gallon etc.

This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University. The dataset was used in the 1983 American Statistical Association Exposition.

Download: cars_auto.csv

The data has 392 observations on the following 9 variables.

  • mpg - miles per gallon
  • cylinders - Number of cylinders between 4 and 8
  • displacement - Engine displacement (cu. inches)
  • horsepower - Engine horsepower
  • weight - Vehicle weight (lbs.)
  • acceleration - Time to accelerate from 0 to 60 mph (sec.)
  • year - Model year (modulo 100)
  • origin - Origin of car (1. American, 2. European, 3. Japanese)
  • name - Vehicle name

1. Import Data

Auto <- read.csv("auto_cars.csv")

2. Exploring Data

cat("No. of Rows:",nrow(Auto))
## No. of Rows: 392
cat("\nNo. of Columns:",ncol(Auto))
## 
## No. of Columns: 10
#name of the column
names(Auto)
##  [1] "X"            "mpg"          "cylinders"    "displacement"
##  [5] "horsepower"   "weight"       "acceleration" "year"        
##  [9] "origin"       "name"
# print top 6 rows
head(Auto)
# print last 4 rows
tail(Auto,4)
str(Auto)
## 'data.frame':    392 obs. of  10 variables:
##  $ X           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : int  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : int  3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : Factor w/ 3 levels "American","European",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : Factor w/ 301 levels "amc ambassador brougham",..: 49 36 230 14 160 141 54 222 240 2 ...

Statistics of the data

summary(Auto)
##        X               mpg          cylinders      displacement  
##  Min.   :  1.00   Min.   : 9.00   Min.   :3.000   Min.   : 68.0  
##  1st Qu.: 99.75   1st Qu.:17.00   1st Qu.:4.000   1st Qu.:105.0  
##  Median :198.50   Median :22.75   Median :4.000   Median :151.0  
##  Mean   :198.52   Mean   :23.45   Mean   :5.472   Mean   :194.4  
##  3rd Qu.:296.25   3rd Qu.:29.00   3rd Qu.:8.000   3rd Qu.:275.8  
##  Max.   :397.00   Max.   :46.60   Max.   :8.000   Max.   :455.0  
##                                                                  
##    horsepower        weight      acceleration        year      
##  Min.   : 46.0   Min.   :1613   Min.   : 8.00   Min.   :70.00  
##  1st Qu.: 75.0   1st Qu.:2225   1st Qu.:13.78   1st Qu.:73.00  
##  Median : 93.5   Median :2804   Median :15.50   Median :76.00  
##  Mean   :104.5   Mean   :2978   Mean   :15.54   Mean   :75.98  
##  3rd Qu.:126.0   3rd Qu.:3615   3rd Qu.:17.02   3rd Qu.:79.00  
##  Max.   :230.0   Max.   :5140   Max.   :24.80   Max.   :82.00  
##                                                                
##       origin                    name    
##  American:245   amc matador       :  5  
##  European: 68   ford pinto        :  5  
##  Japanese: 79   toyota corolla    :  5  
##                 amc gremlin       :  4  
##                 amc hornet        :  4  
##                 chevrolet chevette:  4  
##                 (Other)           :365
## select data with columns
head(Auto$mpg)
## [1] 18 15 18 16 17 15
# select data with row and column
Auto[1,2]
## [1] 18

Correlation of data

cor(Auto[,1:8])
##                       X        mpg  cylinders displacement horsepower
## X             1.0000000  0.5863298 -0.3602752   -0.3871458 -0.4229250
## mpg           0.5863298  1.0000000 -0.7776175   -0.8051269 -0.7784268
## cylinders    -0.3602752 -0.7776175  1.0000000    0.9508233  0.8429834
## displacement -0.3871458 -0.8051269  0.9508233    1.0000000  0.8972570
## horsepower   -0.4229250 -0.7784268  0.8429834    0.8972570  1.0000000
## weight       -0.3217474 -0.8322442  0.8975273    0.9329944  0.8645377
## acceleration  0.2909849  0.4233285 -0.5046834   -0.5438005 -0.6891955
## year          0.9967805  0.5805410 -0.3456474   -0.3698552 -0.4163615
##                  weight acceleration       year
## X            -0.3217474    0.2909849  0.9967805
## mpg          -0.8322442    0.4233285  0.5805410
## cylinders     0.8975273   -0.5046834 -0.3456474
## displacement  0.9329944   -0.5438005 -0.3698552
## horsepower    0.8645377   -0.6891955 -0.4163615
## weight        1.0000000   -0.4168392 -0.3091199
## acceleration -0.4168392    1.0000000  0.2903161
## year         -0.3091199    0.2903161  1.0000000

3. Data Visaulization

Is there any Missing values?

#is.na(Auto)
#table(is.na(Auto))
colSums(is.na(Auto))
##            X          mpg    cylinders displacement   horsepower 
##            0            0            0            0            0 
##       weight acceleration         year       origin         name 
##            0            0            0            0            0
fix(Auto)

i. Histogram

Get the distribution of the variable in data

attach(Auto)
hist(Auto$mpg,main = "Histogram of the MPG",xlab = "MPG",ylab = "Count",col = "green")

ii. Density Plot

plot(density(Auto$weight), col="red", main="Density Plot of Weight")

## counts the factor in data
table(cylinders)
## cylinders
##   3   4   5   6   8 
##   4 199   3  83 103

iii. Barplots

barplot(table(cylinders),main="Count of the Type of cylinders in engine", xlab = "No of Cylinder", ylab = "count",col = c('red','green','blue','orange','brown'))

iv. Scatterplot

Shows the relationship between columns value

plot(mpg,horsepower,main = "MPG vs Horsepower",pch=18,col="red")

table(Auto$origin)
## 
## American European Japanese 
##      245       68       79
barplot(table(Auto$origin),names.arg = c("American","European","Japanese"),col = terrain.colors(3),density=392)

plot(mpg,horsepower,main = "MPG vs Horsepower",type="n")
with(subset(Auto, origin == "American"), points(mpg,horsepower, col = "blue"))
with(subset(Auto, origin == "European"), points(mpg,horsepower, col = "red"))
with(subset(Auto, origin == "Japanese"), points(mpg,horsepower, col = "green"))
legend("topright", pch = 1, col = c("blue", "red","green"), legend = c("American","European","Japanese"))

v. Boxplot

Shows the statistics in single plot

boxplot(Auto$acceleration~Auto$cylinders,main="BoxPlot of acceleration")

vi. Piechart

pie(table(Auto$origin),col = rainbow(3))
legend("topleft",pch=19,col=rainbow(3),legend = c("American","European","Japanese"))

vii. Linechart

plot(table(year),type="l",main="Line plot of the Year", xlab="Year",ylab="Count")
points(table(year),pch=19,type = "p")