Using RStudio with Box

The R script is used in the practical section can be downloaded here and is also shown below

###########################
# SETUP
###########################
# Install "boxr",if necessary, and load the boxr package (and other packages
# used in this script). boxr will allow you to access and write data to the box
# folders to which you you have access.

install.packages("boxr")
install.packages("plyr")
library(boxr)
library(plyr)

####################################
# AUTHENTICATION
####################################
# Authentication is the process of syncing RStudio with your Box.com account.
# If you are not already logged into Box after running this code, you may be
# asked to log into Box. Identify Box as hard disk in the cloud.

box_auth(client_id = "627lww8un9twnoa8f9rjvldf7kb56q1m",
         client_secret = "gSKdYKLd65aQpZGrq9x4QVUNnn5C8qqm")

   # Set the working directory to the "DCEG Box_RStudio_FAIR Workshop"
   # Box folder using the folder ID
box_setwd(dir_id = )

####################################
# READ IN FILES
####################################
# box_read reads the file into local memory - see the console after running the
# code for the temporary file path. After closing your RStudio session the
# file/data is deleted from the temporary local memory.

      # NOTE: The file ID is needed to read in the workshop_bc_data.csv file

bc_data = box_read(file_id = )

####################################
# RUN ANALYSES
####################################
# We can now run standard R code. Here we are generating descriptive statistics,
# creating a new variable, and running a logistic model.

   # Generate descriptive statistics
apply(bc_data[c('age', 'BMI')], 2, summary)

apply((bc_data[c('famhist', 'menarche_cat', 'parity', 'agemeno_cat',
                 'ever_smoke')]), 2, table)

   # Create new variable: BMICAT, a 4-level categorical variable
bc_data$BMIcat <- cut(bc_data$BMI,c(0, 18.5, 25, 30, 35, 100),
                      labels=c("under weight", "normal","over weight","obese1",
                               "obese2"),right=FALSE)

   # Check BMI categories
ddply(bc_data, "BMIcat", summarise,
      N    = length(BMI),
      Min  = min(BMI),
      Mean = mean(BMI),
      Max  = max(BMI))

   # Run logistic model
logit_parity <- glm(status ~ parity + age, data=bc_data, family = "binomial")
summary(logit_parity)

####################################
# WRITE UPDATED FILE TO BOX
####################################
# We can use the box_write function to write the updated data set with the
# newly created BMIcat variable back to your personal Box folder.

   # Set the working directory to the your personal Box folder that you created
   # by setting 'dir_id' to your folder ID
box_setwd(dir_id = 161291860699)

   # Save the file using the 'box_write' function
box_write(object = bc_data, file_name = "bc_data_TUA.csv",
             description = "TUA updated workshop dataset, July 14: added BMI_cat variable")

# PASTE FILE DOCUMENTATION OUTPUT BELOW:


####################################
# READ IN UPDATED FILE
####################################
# The updated data that was saved in the last step can be loaded using the
# file id for the new file (found in console).

   # Set the working directory to the your personal Box folder that you created
   # by setting 'dir_id' to your folder ID
box_setwd(dir_id = )

   # Read in the updated data file from your personal folder using the file ID

   # NOTE: When reading in a file without specifying a 'version_id' or
   # 'version_no', you will always get the most recent file version

bc_data_new = box_read(file_id = )

####################################
# MANIPULATE UPDATED FILE
####################################
# The data can then be manipulated or analyzed as needed

   # Subset the data to include only those 50 years of age or older
bc_data_new = subset(bc_data_new, age>=50)

####################################
# SAVE UPDATED FILE
####################################
# Because we are using the same file name, Box will update the data set using
# version control (rather than creating a new file)

box_write(object = bc_data_new, file_name = "bc_data_TUA.csv",
          description = "ANH Updated workshop dataset, July 14: ages>=50")

# PASTE FILE DOCUMENTATION OUTPUT BELOW


####################################
# READ IN PREVIOUS FILE VERSION
####################################
# Previous versions of the data file can be loaded from Box using either the
# version ID or version number of the version of interest.

      # Option 1: Load file using version ID
bc_data_org = box_read(file_id = , version_id = )

      # Option 2: Load file using version number
bc_data_org = box_read(file_id = , version_no = 1)