Code
library(dave.ocpu.client)
source(system.file('app/src/API.R',package='dave.app'))
dave.ocpu.client
package. All DAVe functionality not listed below can be implemented using the module R
libraries directly.library(dave.ocpu.client)
source(system.file('app/src/API.R',package='dave.app'))
library(dplyr)
<- c(
apis 'dave.ml' = dave_ml_connection,
'ctsgetr' = ctsgetr_connection,
'dave.network' = dave_network_connection,
'dave.path' = dave_path_connection,
'dave.multivariate' = dave_multivariate_connection
)
lapply(1:length(apis), function(i){
<-apis[[i]]$get()$status_code ==200
status
data.frame(endpoint=names(apis)[i], status = ifelse(status,'OK','DOWN'))
%>%
}) do.call('rbind',.)
endpoint status
1 dave.ml OK
2 ctsgetr OK
3 dave.network OK
4 dave.path OK
5 dave.multivariate OK
<-ocpu_dvmPCA_methods(dave_multivariate_connection) x
No encoding supplied: defaulting to UTF-8.
$results x
[1] "svd" "nipals" "rnipals" "bpca" "ppca"
[6] "svdImpute" "robustPca" "nlpca" "llsImpute" "llsImputeAll"
library(dave.multivariate.app)
data("dave_data")
<-dave_data data
<- list(
body data=data,
pca.components = 2,
pca.cv = 'q2',
pca.algorithm = "svd",
pca.center = TRUE,
pca.scaling = "uv",
seed = 123,
return = "list"
)
<-ocpu_dvmPCA(dave_multivariate_connection,body=body)
x<-x$results .pca
summary(.pca)$description
[1] "Principal components analysis was conducted using the svd method. The data set was centered and scaled using uv. A total of 2 components were calculated, explaining 28.2% variance in the data set. The cross-validated variance explained was 36.1%."
data("dave_data_row_meta")
<-dave_data_row_meta %>% select(class)
color<-dave_data_row_meta %>% select(label)
label<-list(size=5,labels=label,color=color,label=TRUE) plot_args
# plot_args$type<-'scree'
<-plot(.pca,plot_args=plot_args,plot_type='scree')
p::ggplotly(p) plotly
Warning: `gather_()` was deprecated in tidyr 1.2.0.
ℹ Please use `gather()` instead.
ℹ The deprecated feature was likely used in the plotly package.
Please report the issue at <https://github.com/plotly/plotly.R/issues>.
$type<-'cumm'
plot_args<-plot(.pca,plot_type='cumm',plot_args=plot_args) #type not changing
p# p<-dave.multivariate.app:::dvmPCA_scree_plot(.pca, type='cumm')
::ggplotly(p) plotly
$type<-NULL
plot_args$point.labels<-plot_args$labels
plot_args$font.size<-3
plot_args<-plot(.pca,plot_type='scores',plot_args=plot_args) #
p::ggplotly(p) plotly
$type<-NULL
plot_args$point.labels<-plot_args$labels
plot_args$font.size<-3
plot_args<-plot(.pca,plot_type='diagnostic',plot_args=plot_args) #
p::ggplotly(p) plotly
data("dave_data_col_meta")
$color<-NULL
plot_args$point.labels<-dave_data_col_meta$name
plot_args$font.size<-2
plot_args<-plot(.pca,plot_type='loadings',plot_args=plot_args) #
p::ggplotly(p) plotly
data("dave_data_col_meta")
$color<-dave_data_row_meta %>% select(class)
plot_args$point.labels<-dave_data_col_meta$name
plot_args$font.size<-2
plot_args<-plot(.pca,plot_type='biplot',plot_args=plot_args) #
p::ggplotly(p) plotly
library(dave.stat)
library(dave.ml.app)
data("dave_data")
<-dave_data
tmp<-'class'
y<-dave_data_row_meta$class
.y<-.y tmp[y]
<-'Kappa'
.metric<-c('rf')
.method<-TRUE
classProbs<-1
tuneN<-NULL tuneGrid
<-list(data=tmp,y=y)
body<-ocpu_create_model_data(dave_ml_connection ,body=body) .data
<-list(method="repeatedcv",num=7,rep=3,classProbs=classProbs) #this need to be flattened to a string
body<- ocpu_create_fit_control(dave_ml_connection ,body=body) fitControl
# create model
<-
body list(
data = .data,
y = y,
metric = .metric,
method = .method,
fitControl = fitControl,
tuneN = tuneN,
tuneGrid = tuneGrid,
seed= 1234
)
<-ocpu_multi_create_predictive_model(dave_ml_connection ,body = body) mod
<-list(multi_mod=mod)
bodyocpu_get_model_perf(dave_ml_connection ,body=body)
$meta \(meta\)session [1] “x0ab7c097aceeee”
\(meta\)status [1] 201
$paths [1] “/ocpu/tmp/x0ab7c097aceeee/R/.ocpu_get_model_perf” “/ocpu/tmp/x0ab7c097aceeee/R/.val” “/ocpu/tmp/x0ab7c097aceeee/stdout”
[4] “/ocpu/tmp/x0ab7c097aceeee/source” “/ocpu/tmp/x0ab7c097aceeee/console” “/ocpu/tmp/x0ab7c097aceeee/info”
[7] “/ocpu/tmp/x0ab7c097aceeee/files/DESCRIPTION”
$results \(results\)description [1] “Machine learning based predictive modelling was used to predict class. A single model was fit and optimized based on predictive performance on the test data.A random forest model was developed on 51 samples and 188 predictors to predict 2 classes: diabetic.., non.diabetic.. with no pre-processing. Cross validation was done by resampling: cross-validated (7 fold, repeated 3 times) . Tuning parameter ‘mtry’ was held constant at a value of 62.”
\(results\)table model train test metric maximize time_min 1 rf 0.6173153 0.7148289 Kappa TRUE 0.04736702
\(results\)best model train test metric maximize time_min 1 rf 0.6173153 0.7148289 Kappa TRUE 0.04736702 description 1 A random forest model was developed on 51 samples and 188 predictors to predict 2 classes: diabetic.., non.diabetic.. with no pre-processing. Cross validation was done by resampling: cross-validated (7 fold, repeated 3 times) . Tuning parameter ‘mtry’ was held constant at a value of 62.
library(dave.ml.app)
<-get_ocpu_obj(ocpu_session(mod),dave_ml_connection$url)
.mod<-dave.ml.app::plot.model_list(.mod,type='model',print=FALSE)
xggplotly(x)
# see other types c('performance', 'model','importance','confusion','classification')
library(dave.ml.app)
<-get_ocpu_obj(ocpu_session(mod),dave_ml_connection$url)
.mod<-dave.ml.app::plot.model_list(.mod,type='classification',curve='pr',print=FALSE)
xggplotly(x)
library(dave.ml.app)
<-get_ocpu_obj(ocpu_session(mod),dave_ml_connection$url)
.mod<-dave.ml.app::plot.model_list(.mod,type='confusion',percent=FALSE,print=FALSE)
x x
# ggplotly(x) # see other types c('performance', 'model','importance','confusion','classification')
RFE
to reduce model features (variables) and increase model performance.<-'rfFuncs' # randomforest
func<-1 # repeated CV
repeats<-5 # CV folds
number<-list(func=func,repeats=repeats,number=number)
body<-ocpu_create_rfe_control(dave_ml_connection ,body) ctrl
# Increase 'tuneLength' to add more tuning hyperparameter tune grid combinations.
<-1
tuneLength<-123
seed
# Specify training data
<-list(obj=.data,
bodyname='train.data')
<-get_ocpu_list_item(dave_ml_connection ,body)
train_data
#specify variable subsets for RFE
#NOTE: referencing previous ocpu session objects dynamically in ocpu calls is not easy. If we need to create a subset for RFE we need to calculate this object in another session and reference its results. To do this we also need to calculate the total number of columns in the `train_data`.
#get total number of columns/features in the data
#encode NULL is used to reference the session. The encoding = 'form' can be used for more complex requirements.
<- list(x = ocpu_session(train_data))
body <- 'ocpu/library/base/R/dim'
pkg_url
<-ocpu_post(
dims
dave_ml_connection,body = body,
pkg_url = pkg_url,
encode = NULL,
return_value = TRUE
$results
)
#create subset
<- list(from=1,to=dims[2],by=3)
body <- 'ocpu/library/base/R/seq'
pkg_url <- FALSE
return_value
<-ocpu_post(
.subset
dave_ml_connection,body = body,
pkg_url = pkg_url,
encode = 'json',
return_value = return_value
)
#carry out RFE feature selection
<-
body list(data = train_data,
y = y,
subset = .subset,
ctrl = ctrl,
tuneLength = tuneLength,
seed = seed)
<-ocpu_create_rfe(dave_ml_connection ,body) rfe
#pick subset
<- 'PickSizeBest' #'pickSizeTolerance' #"PickSizeBest"#'pickSizeTolerance'
best<-'Kappa'
metric<-list(rfRFE=rfe,best=best, metric=metric,tolerance=.1)
body
# best<- 'pickSizeTolerance'
# tolerance<-5
# body<-list(rfRFE=rfe,best=best,tolerance=tolerance)
#get top model variables
<-ocpu_rebuild_rfe(dave_ml_connection ,body)
selected_rfe<-get_ocpu_obj(ocpu_session(selected_rfe),dave_ml_connection$url) .selected_rfe
library(dave.ml.app)
#collect all required args
<-list(
rfe_select_argsbest_subset = best,
metric = metric,
tolerance = NULL,
func = func
)
do.call('summary.model_rfe',list(c(.selected_rfe,rfe_select_args)))
[1] "Recursive feature elimination (RFE) was used to identify top predictive variables. Backward and forward variable selection was done to optimize model Kappa using rfFuncs, validated with for folds repeated times while conducting round(s) of model parameter tuning. The final model was selected based on the best subset method. The selected model contains 16 variable(s) and has Kappa equal to 0.701 +/- 0.163. The top 5 variables include: var188, var173, var51, var15 and var34."
<-dave.ml.app::plot.model_rfe(.selected_rfe,metric = metric,print=FALSE)
xggplotly(x)
<-function(){list(url = "http://ec2-107-21-83-113.compute-1.amazonaws.com:81/ocpu/", body = list(
trans_argsid = c("C00310", "C00379", "C01762", "C00385", "C00183",
"C02067", "C00299", "C00366", "C00086", "C00106", "C00082",
"C00078", "", "C06771", "C01083", "C01157", "C02477", "C00178",
"C00214", "C00188", "", "", "C00245", "D09007", "C00059",
"C00089", "C00042", "C01530", "C00794", "C08250", "C00493",
"C00065", "C00213", "C00818", "C00199", "C00121", "C00474",
"C00507", "C00022", "C00013", "", "C02457", "C00148", "",
"C00009", "C00346", "", "C00079", "C07086", "C01601", "C00864",
"C08362", "C00249", "C01879", "C00209", "C00295", "C00077",
"C00712", "C02721", "C00253", "C00153", "", "", "C00624",
"", "C00645", "C03136", "", "C02989", "C00073", "", "", "C00208",
"C00149", "C07272", "", "C00047", "C06427", "C01595", "C01725",
"", "C02679 ", "", "C00186", "C00328", "", "C00407", "C00311",
"", "", "C00294", "C02043", "C00954", "C16526", "C00262",
"C00192", "", "C05629", "C00263", "C01817", "C00135", "",
"C00387", "C00160", "C00037", "C00093", "C05401", "C00258",
"C00489", "C00064", "C00025", "C00191", "C00092", "C00031",
"C00257", "", "C00446", "C00124", "C00880", "C01235", "C00122",
"C01018", "C01019", "C00085", "C00095", "C00189", "C00503",
"C08374", "", "C00112", "", "C00097", "C00791", "", "C00327",
"C00158", "C00695", "C00187", "", "C06423", "C01571", "C03044",
"C00099", "C00180", "C08281", "C00049", "C00152", "", "C06425",
"C01904", "C00259", "C00872", "C00026", "C01551", "C00499",
"C00041", "C06104", "C00020", "C00212", "C00417", "C05659",
"", "C01015", "C07588", "C00989", "C00156", "", "C00197",
"", "C01013", "C01089", "", "C06474", "", "C00629", "C02112",
"C00233", "", "C02630", "C05984", "", "", "C00956", "", "D01947",
"", "C01885", "C07326"), from = "KEGG", to = c("KEGG", "InChIKey"
db_name = "/ctsgetr/inst/ctsgetr.sqlite", from_obj = "KEGG"))} ),
<-trans_args()$body
body
<-ocpu_post(
x
ctsgetr_connection,body = body,
pkg_url = 'ocpu/library/CTSgetR/R/CTSgetR',
encode = 'json',
return_value = TRUE
)
No encoding supplied: defaulting to UTF-8.
head(x$results)
id InChIKey KEGG
1 C00310 LQXVFWRQNMEDEE-OVEKKEMJSA-N C00310
2 C00379 HEBKCHPVOIAQTA-SCDXWVJYSA-N C00379
3 C01762 UBORTCNDUKBEOP-UUOKFMHZSA-N C01762
4 C00385 LRFVTYWOQMYALW-UHFFFAOYSA-N C00385
5 C00183 KZSNJWFQEVHDMF-BYPYZUCNSA-N C00183
6 C02067 PTJWIQPHWPFNBW-GBNDHIKLSA-N C02067
dave_network_col_meta$id
, see dave_network_col_meta
to view the corresponding molecule names.library(dave.network.app)
data("dave_network")
data("dave_network_col_meta")
<-dave_network_col_meta data
<-'CID'
idcolumn<-'id'
net_index<-'CID'
type<- '/open_cpu/CID.SDF.DB'
DB
<-list(data=data,
bodyidcolumn=idcolumn,
type = type,
net_index = net_index,
DB=DB)
<- structural_net <-
net ocpu_metabolic_network(dave_network_connection, body = body,return_value = TRUE)
lapply(net$results,head)
$nodes
ID name KEGG CID id
1 var1 xylulose C00310 5289590 1
2 var2 xylitol C00379 6912 2
3 var3 xanthosine C01762 64959 3
4 var4 xanthine C00385 1188 4
5 var5 valine C00183 6287 5
6 var6 uridine C02067 15047 6
$edges
from to value value.1 type
176 153 27 0.7027027 0.7027027 CID
351 153 74 0.6304348 0.6304348 CID
352 27 74 0.6923077 0.6923077 CID
526 153 39 0.6486486 0.6486486 CID
527 27 39 0.6250000 0.6250000 CID
528 74 39 0.5609756 0.5609756 CID
$node_mapping
NULL
$vis_config
NULL
<-'KEGG'
idcolumn<-'id'
net_index<-'KEGG'
type<- '/open_cpu/CID.SDF.DB'
DB
<-list(data=data,
bodyidcolumn=idcolumn,
type = type,
net_index = net_index,
DB=DB)
<- biochem_net<-
net ocpu_metabolic_network(dave_network_connection, body = body)
lapply(net$results,head)
$nodes
ID name KEGG CID id
1 var1 xylulose C00310 5289590 1
2 var2 xylitol C00379 6912 2
3 var3 xanthosine C01762 64959 3
4 var4 xanthine C00385 1188 4
5 var5 valine C00183 6287 5
6 var6 uridine C02067 15047 6
$edges
from to value value.1 type
1 1 2 1 1 KEGG
2 1 150 1 1 KEGG
3 4 3 1 1 KEGG
4 36 3 1 1 KEGG
5 103 3 1 1 KEGG
6 95 4 1 1 KEGG
$node_mapping
NULL
$vis_config
NULL
library(dave.network.app)
data("dave_network")
<-dave_network tmp_data
<- list(
body data = tmp_data,
cor_method = 'spearman',
cor_FDR = TRUE,
cor_cutoff = .05,
lambda = NULL,
huge_method = 'manual'
)
<-
x ocpu_get_cor_mat(dave_network_connection,
body = body)
<-
body list(
data = tmp_data,
nlambda = 10,
paranorm = TRUE,
lambda.min.ratio = .1,
check.cor = FALSE
)
<-
huge_objocpu_get_huge_obj(dave_network_connection,
body = body,
return_value = TRUE)
opt=maual
and specify a lambda
to manualy tune the network connection strength.<-
body list(
huge_obj=huge_obj,
lambda=NULL, #round(huge_obj$results$lambda[5],4), # legacy shenanigans for manual
opt='stars'
)
<-
huge_edgesocpu_get_huge_edges(dave_network_connection,
body = body)
head(huge_edges$results)
source target value
1 2 1 1
2 24 1 1
3 30 1 1
4 36 1 1
5 39 1 1
6 79 1 1
#load calculated objects for the network nodes
library(dave.stat)
data("dave_stat_col_meta")
#join external analysis results with a calculated network
<- biochem_net$results
network_obj
#match internal indicies
$id <- gsub('var', '', dave_stat_col_meta$ID)
dave_stat_col_meta
$nodes <-
network_objleft_join(network_obj$nodes, dave_stat_col_meta, by = 'id')
#create mappings for node attributes
<-
FC map_fold_change(network_obj$nodes$FC_non.diabetic.._diabetic.., transform = 'log2')$results
$'_dir' <- factor(FC$'_dir',labels = c('increased','unchanged','decreased'),levels = c(1,0,-1))
FC
$nodes <- cbind(network_obj$nodes, FC = FC)
network_obj
$node_mapping <- list(
network_objnode_col = 'FC._dir',
node_size = 'FC._mag',
node_label = 'name.y',
node_size_range = c(3, 7)
)
<- do.call('network.visualize', network_obj)
x
# network.visnetwork(x)
ggplotly(dave.network.app:::plot.network_viz(x))
library(dave.ocpu.client)
source(system.file('app/src/API.R',package='dave.app'))
DAVe
API endpoints only require a body
argument and you can see all the created asset paths by setting return_value = FALSE
.data("mtcars")
<-list(x=mtcars)
body<- 'ocpu/library/base/R/colnames' #this will call base::colnames
pkg_url
<-ocpu_post(
res0
dave_ml_connection,body = body,
pkg_url = pkg_url,
encode = 'json',
return_value = TRUE
$results
)
res0
[1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
<-ocpu_post(
res1
dave_ml_connection,body = body,
pkg_url = pkg_url,
encode = 'json',
return_value = FALSE
)
cat('Add the "$paths" below to the following url to view the results:')
Add the "$paths" below to the following url to view the results:
cat(dave_ml_connection$url)
http://ec2-52-22-43-130.compute-1.amazonaws.com/ml/
res1
$meta
$meta$session
[1] "x059dd61ae4576c"
$meta$status
[1] 201
$paths
[1] "/ocpu/tmp/x059dd61ae4576c/R/.val" "/ocpu/tmp/x059dd61ae4576c/R/colnames" "/ocpu/tmp/x059dd61ae4576c/R/x" "/ocpu/tmp/x059dd61ae4576c/stdout"
[5] "/ocpu/tmp/x059dd61ae4576c/source" "/ocpu/tmp/x059dd61ae4576c/console" "/ocpu/tmp/x059dd61ae4576c/info" "/ocpu/tmp/x059dd61ae4576c/files/DESCRIPTION"
get_ocpu_obj(ocpu_session(res1) ,dave_ml_connection$url)
[1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
<-list(x=res1, collapse='|')
body<- 'ocpu/library/base/R/paste'
pkg_url
ocpu_post(
dave_ml_connection,body = body,
pkg_url = pkg_url,
encode = 'form',
return_value = TRUE
$results )
[1] "mpg|cyl|disp|hp|drat|wt|qsec|vs|am|gear|carb"
ocpuclient::get_ocpu_list_item
is useful for selecting specific named list elements from previous sessions results.#get a specific element from a previous session either by list index or name
<-list(obj='<NAME of OBJECT>',
bodyname='<NAME of LIST ITEM>')
get_ocpu_list_item(dave_ml_connection ,body)