The Auto data contains information for cars like horsepower, miles per gallon etc.
This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University. The dataset was used in the 1983 American Statistical Association Exposition.
Download: cars_auto.csv
The data has 392 observations on the following 9 variables.
Auto <- read.csv("auto_cars.csv")
cat("No. of Rows:",nrow(Auto))
## No. of Rows: 392
cat("\nNo. of Columns:",ncol(Auto))
##
## No. of Columns: 10
#name of the column
names(Auto)
## [1] "X" "mpg" "cylinders" "displacement"
## [5] "horsepower" "weight" "acceleration" "year"
## [9] "origin" "name"
# print top 6 rows
head(Auto)
# print last 4 rows
tail(Auto,4)
str(Auto)
## 'data.frame': 392 obs. of 10 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : int 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : int 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : int 3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : int 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : Factor w/ 3 levels "American","European",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 301 levels "amc ambassador brougham",..: 49 36 230 14 160 141 54 222 240 2 ...
Statistics of the data
summary(Auto)
## X mpg cylinders displacement
## Min. : 1.00 Min. : 9.00 Min. :3.000 Min. : 68.0
## 1st Qu.: 99.75 1st Qu.:17.00 1st Qu.:4.000 1st Qu.:105.0
## Median :198.50 Median :22.75 Median :4.000 Median :151.0
## Mean :198.52 Mean :23.45 Mean :5.472 Mean :194.4
## 3rd Qu.:296.25 3rd Qu.:29.00 3rd Qu.:8.000 3rd Qu.:275.8
## Max. :397.00 Max. :46.60 Max. :8.000 Max. :455.0
##
## horsepower weight acceleration year
## Min. : 46.0 Min. :1613 Min. : 8.00 Min. :70.00
## 1st Qu.: 75.0 1st Qu.:2225 1st Qu.:13.78 1st Qu.:73.00
## Median : 93.5 Median :2804 Median :15.50 Median :76.00
## Mean :104.5 Mean :2978 Mean :15.54 Mean :75.98
## 3rd Qu.:126.0 3rd Qu.:3615 3rd Qu.:17.02 3rd Qu.:79.00
## Max. :230.0 Max. :5140 Max. :24.80 Max. :82.00
##
## origin name
## American:245 amc matador : 5
## European: 68 ford pinto : 5
## Japanese: 79 toyota corolla : 5
## amc gremlin : 4
## amc hornet : 4
## chevrolet chevette: 4
## (Other) :365
## select data with columns
head(Auto$mpg)
## [1] 18 15 18 16 17 15
# select data with row and column
Auto[1,2]
## [1] 18
Correlation of data
cor(Auto[,1:8])
## X mpg cylinders displacement horsepower
## X 1.0000000 0.5863298 -0.3602752 -0.3871458 -0.4229250
## mpg 0.5863298 1.0000000 -0.7776175 -0.8051269 -0.7784268
## cylinders -0.3602752 -0.7776175 1.0000000 0.9508233 0.8429834
## displacement -0.3871458 -0.8051269 0.9508233 1.0000000 0.8972570
## horsepower -0.4229250 -0.7784268 0.8429834 0.8972570 1.0000000
## weight -0.3217474 -0.8322442 0.8975273 0.9329944 0.8645377
## acceleration 0.2909849 0.4233285 -0.5046834 -0.5438005 -0.6891955
## year 0.9967805 0.5805410 -0.3456474 -0.3698552 -0.4163615
## weight acceleration year
## X -0.3217474 0.2909849 0.9967805
## mpg -0.8322442 0.4233285 0.5805410
## cylinders 0.8975273 -0.5046834 -0.3456474
## displacement 0.9329944 -0.5438005 -0.3698552
## horsepower 0.8645377 -0.6891955 -0.4163615
## weight 1.0000000 -0.4168392 -0.3091199
## acceleration -0.4168392 1.0000000 0.2903161
## year -0.3091199 0.2903161 1.0000000
Is there any Missing values?
#is.na(Auto)
#table(is.na(Auto))
colSums(is.na(Auto))
## X mpg cylinders displacement horsepower
## 0 0 0 0 0
## weight acceleration year origin name
## 0 0 0 0 0
fix(Auto)
Get the distribution of the variable in data
attach(Auto)
hist(Auto$mpg,main = "Histogram of the MPG",xlab = "MPG",ylab = "Count",col = "green")
plot(density(Auto$weight), col="red", main="Density Plot of Weight")
## counts the factor in data
table(cylinders)
## cylinders
## 3 4 5 6 8
## 4 199 3 83 103
barplot(table(cylinders),main="Count of the Type of cylinders in engine", xlab = "No of Cylinder", ylab = "count",col = c('red','green','blue','orange','brown'))
Shows the relationship between columns value
plot(mpg,horsepower,main = "MPG vs Horsepower",pch=18,col="red")
table(Auto$origin)
##
## American European Japanese
## 245 68 79
barplot(table(Auto$origin),names.arg = c("American","European","Japanese"),col = terrain.colors(3),density=392)
plot(mpg,horsepower,main = "MPG vs Horsepower",type="n")
with(subset(Auto, origin == "American"), points(mpg,horsepower, col = "blue"))
with(subset(Auto, origin == "European"), points(mpg,horsepower, col = "red"))
with(subset(Auto, origin == "Japanese"), points(mpg,horsepower, col = "green"))
legend("topright", pch = 1, col = c("blue", "red","green"), legend = c("American","European","Japanese"))
Shows the statistics in single plot
boxplot(Auto$acceleration~Auto$cylinders,main="BoxPlot of acceleration")
pie(table(Auto$origin),col = rainbow(3))
legend("topleft",pch=19,col=rainbow(3),legend = c("American","European","Japanese"))
plot(table(year),type="l",main="Line plot of the Year", xlab="Year",ylab="Count")
points(table(year),pch=19,type = "p")