-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathsubmitscript1.R
More file actions
81 lines (44 loc) · 1.94 KB
/
submitscript1.R
File metadata and controls
81 lines (44 loc) · 1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#Assigment 2
getwd()
setwd("C:/Users/aurquhart/Documents/Github/R_Programming_Assignment2")
#Point and download zip
dataset_url <- "https://d396qusza40orc.cloudfront.net/rprog%2Fdata%2Fspecdata.zip"
download.file(dataset_url, "specdata.zip")
#unzip file
unzip("specdata.zip", exdir = "specdata")
#check files
list.files("specdata//specdata")
#So we're going to write a function to calculate the mean of
#a field in the files - sulfate or nitrate
#Step 1 - test the loop to consolodate files
files <- list.files("specdata//specdata")
directory <- "specdata//specdata"
files_list <- list.files(directory, full.names=TRUE) #creates a list of files
dat <- data.frame() #creates an empty data frame
number_of_files <- length(files)
for (i in 1:number_of_files) { #loops through the files, rbinding them together
dat <- rbind(dat, read.csv(files_list[i]))
}
#Now need to test that I can calculate the mean of the 2 fields
mean(dat[, "sulfate"], na.rm=TRUE) #identifies the mean weight
mean(dat[, "nitrate"], na.rm=TRUE) #identifies the mean weight
#head(dat)
#Now put it all into a function
pollutantmean <- function(directory, pollutant, id = 1:332) {
## 'directory' is a character vector of length 1 indicating
## the location of the CSV files
## 'pollutant' is a character vector of length 1 indicating
## the name of the pollutant for which we will calculate the
## mean; either "sulfate" or "nitrate".
## 'id' is an integer vector indicating the monitor ID numbers
## to be used
## Return the mean of the pollutant across all monitors list
## in the 'id' vector (ignoring NA values)
dat <- data.frame() #creates an empty data frame
files_list <- list.files(directory, full.names=TRUE) #creates a list of files
for (i in id) { #loops through the files, rbinding them together
dat <- rbind(dat, read.csv(files_list[i]))
}
mean(dat[, pollutant], na.rm=TRUE)
}
pollutantmean("specdata","sulfate",id=1:5)