%%%%%%%%%%%% cd ~/Files/Creations/R/widals/inst/doc ; R64 CMD Sweave funwithfunload.Snw

\documentclass[11pt]{article}
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{epstopdf}
\usepackage{caption}
\usepackage{amsmath, amsthm} %%%%%%%%%%%%%%% MUST BE ADDED
%\usepackage{supertabular}
%\usepackage{wasysym}
\usepackage{setspace}

\usepackage{Sweave}

\usepackage{tabularx}
\newcolumntype{Y}{>{\footnotesize\raggedright\arraybackslash}X}

%\singlespacing
\onehalfspacing
%\doublespacing

\usepackage{natbib}

%\usepackage{color}
%\definecolor{MyDarkGreen}{rgb}{0.0,0.4,0.0}
%\definecolor{MyDarkRed}{rgb}{0.4,0.0,0.0} 
%\usepackage[colorlinks=true, urlcolor= MyDarkGreen, linkcolor= MyDarkRed ]{hyperref}
\usepackage{hyperref}

\DeclareCaptionLabelSeparator{space}

\DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png}
\textwidth = 6.5 in
\textheight = 9 in
\oddsidemargin = 0.0 in
\evensidemargin = 0.0 in
\topmargin = 0.0 in
\headheight = 0.0 in
\headsep = 0.0 in
\parskip = 0.2in
\parindent = 0.0in
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}{Definition}


\newcommand{\ve}{\varepsilon}


\newcommand{\wt}{\widetilde}
\newcommand{\wh}{\widehat}
\newcommand{\0}{\mathbf{0}}


\newcommand{\st}{\mathrm{ \:\: s.t. \:\: }}

\newcommand{\bs}[1]{\boldsymbol{#1}}
\newcommand{\bm}[1]{\mbox{\boldmath$#1$}}
\newcommand{\mr}[1]{\mathrm{#1}}
\newcommand{\mb}[1]{\mathbf{#1}}

\newcommand{\smss}[1]{^{_{#1}}}

\newcommand{\apri}{\smss{\,(-)}}
\newcommand{\apos}{\smss{\,(+)}}

\newcommand{\betaup}{\rotatebox[origin=c]{12}{$\beta$}}

\newcommand{\ttb}{\hspace{-0.01cm}}

\newcommand{\diag}{\mathsf{diag}}
\newcommand{\minz}{\mathsf{min}}
\newcommand{\maxz}{\mathsf{max}}
\newcommand{\zsin}{\mathsf{sin}}
\newcommand{\zcos}{\mathsf{cos}}

\newcommand{\SE}{\mathsf{SE}}
\newcommand{\range}{\mathsf{range}}

\newcommand{\ndxrng}[2]{#1 \,\!\! : \,\!\! #2}

\newenvironment{DZcaption}[2]%
               {\begin{list}{}{\leftmargin#1\rightmargin#2}\item{}}%
               {\end{list}}


%\VignetteIndexEntry{Package \texttt{widals}: Fun with \texttt{fun.load()}}


\begin{document}

\title{Package \texttt{widals}: Fun with \texttt{fun.load()}}
\author{Dave Zes}
\maketitle


\section{Intro}


\texttt{widals} Users are encouraged to create their own \texttt{fun.load} functions to suit particular situations.

Here, we will create a function called \texttt{fun.load.widals.ab}; very close kin to the stock \texttt{fun.load.widals.a}, except that it includes an \emph{extra} ALS stage that will run in series with WIDALS.  Since the stock WIDALS functions, e.g., \texttt{widals.snow}, fit/predict using an initial ALS stage followed in series with the stochastic adjustment, the \texttt{fun.load.widals.ab} solving method could be notated as 

ALS1 $\longrightarrow$ ALS2 $\longrightarrow$ Crispify


When using the Ozone data, there is no real evidence that the additional ALS stage --- as constructed --- offers any improvement.  In fact, the RMSE increases slightly.  The advantage of including additional bases-transform covariates is more commonly realized when many sensors are present.  In Section \ref{SecSim}, where we simulate a composite system, we will find a small reduction in CV RMSE over the single stage ALS.


\newpage

\section{WIDALS Review}

We'll work with the Californis Ozone demonstration data set obtained from CARB \cite{CARB}.

Important: It is intended that the User run through \emph{all} the code in this Section.

Ready data and covariates:


<<eval=TRUE, echo=FALSE>>=
options(width=49)
options(prompt=" ")
options(continue="   ")
@


<<eval=FALSE, results=hide>>=
options(stringsAsFactors=FALSE)

library(snowfall)

k.cpus <- 2 #### set the number of cpus for snowfall

library(widals)
data(O3)

Z.all <- as.matrix(O3$Z)[366:730, ]
locs.all <- O3$locs[ , c(2,1)]
hsa.all <- O3$helevs/500

xdate <- rownames(Z.all)

tau <- nrow(Z.all)
n.all <- ncol(Z.all)

xgeodesic <- TRUE

Z <- Z.all
locs <- locs.all
n <- n.all

dateDate <- strptime(xdate, "%Y%m%d")
doy <- as.integer(format(dateDate, "%j"))

Ht <- cbind( sin(2*pi*doy/365), cos(2*pi*doy/365) )
Hs.all <- cbind(matrix(1, nrow=n.all), hsa.all)
Hisa.ls <- H.Earth.solar(locs[ , 2], locs[ , 1], dateDate)

Hst.ls.all2 <- list()
for(tt in 1:tau) {
    Hst.ls.all2[[tt]] <- cbind(Hisa.ls[[tt]], Hisa.ls[[tt]]*hsa.all)
    colnames(Hst.ls.all2[[tt]]) <- c("ISA", "ISAxElev")
}
Hst.ls <- Hst.ls.all2
Hs <- Hs.all
Ht.original <- Ht


train.rng <- 30:tau
test.rng <- train.rng

k.glob <- 10
run.parallel <- TRUE
@


Assign the necessary parameters:

<<eval=FALSE, results=hide>>=


FUN.source <- fun.load.widals.a

d.alpha.lower.limit <- 0
rho.upper.limit <- 100
rgr.lower.limit <- 10^(-7)

GP <- c(1/10, 1, 0.01, 3, 1)

############# pseudo cross-validation
rm.ndx <- 1:n
cv <- -2
lags <- c(0)
b.lag <- -1

sds.mx <- seq(2, 0.01, length=k.glob) * matrix(1, k.glob, length(GP))
ltco <- -10
stnd.d <- TRUE

FUN.GP <- NULL

sfInit(TRUE, k.cpus)
FUN.source()

set.seed(99999)
MSS.snow(FUN.source, NA, p.ndx.ls, f.d, sds.mx=sds.mx,
k.glob, k.loc.coef=7, X = NULL)
sfStop()
#### 11.90536
@


<<eval=FALSE, results=hide>>=
k.glob <- 10

FUN.source <- fun.load.widals.a

GP <- c(1/10, 1, 0.01, 3, 1)

######### true spacial cross-validation
rm.ndx <- create.rm.ndx.ls(n, 14)
cv <- 2
lags <- c(0)
b.lag <- 0

sds.mx <- seq(2, 0.01, length=k.glob) * matrix(1, k.glob, length(GP))
ltco <- -10

FUN.GP <- NULL

sfInit(TRUE, k.cpus)
FUN.source()

set.seed(99999)
MSS.snow(FUN.source, NA, p.ndx.ls, f.d, sds.mx=sds.mx,
k.glob, k.loc.coef=7, X=NULL)
sfStop()
#### 12.11686
@


We will now invite the requirement of an additional ALS stage by introducing spacial covariates in the form of a bases expansion (generally, see \cite{Wasserman, Efrom, WaveletsAbram}, specifically, \cite{FRK}).  For this we'll call upon the excellent package \texttt{LatticeKrig} by Professor Nychka and friends \cite{LatticeKrig}.


<<eval=FALSE, results=hide>>=
library(LatticeKrig)

xxb <- 4
yyb <- 4
center <- as.matrix(expand.grid( seq(0, 1, length=xxb), seq(0, 1, length=yyb)))

###### run together
ffunit <- function(x) { return( (x-min(x)) / (max(x)-min(x)) ) }
locs.unit <- apply(locs, 2, ffunit)
locs.unit <- matrix(as.vector(locs.unit), ncol=2)
###### run togther

xPHI <- Radial.basis(as.matrix(locs.unit), center, 0.5)
Hs.lkrig <- matrix(NA, nrow(locs), xxb*yyb)
for(i in 1:nrow(locs)) {
    Hs.lkrig[i, ] <- xPHI[i]
}
Hs.lkrig <- Hs.lkrig[ , -which( apply(Hs.lkrig, 2, sum) < 0.1 ), drop=FALSE ]
@


We now create two sets of covariates.  

The first (prefixed \texttt{XA.}) will house the usual goods: constant term, elevation, seasonal, ISA, and the ISA-elevation interaction.

The second (prefixed \texttt{XB.}) will house the bases transform.


<<eval=FALSE, results=hide>>=
XA.Hs <- cbind(rep(1,n), hsa.all)
XA.Ht <- Ht.original
XA.Hst.ls <- Hst.ls

Hst.sumup(XA.Hst.ls, XA.Hs, XA.Ht)


XB.Hs <- 10*Hs.lkrig
XB.Ht <- NULL
XB.Hst.ls <- NULL

Hst.sumup(XB.Hst.ls, XB.Hs, XB.Ht)
@


\newpage

Our custom function:

{\scriptsize
<<eval=FALSE, results=hide>>=
fun.load.widals.ab <- function() {
     
    if( run.parallel ) {
        sfExport("Z", "XA.Hs", "XA.Ht", "XA.Hst.ls", "XB.Hs", "XB.Ht", "XB.Hst.ls", 
        "locs", "lags", "b.lag", "cv", "rm.ndx", "train.rng", "test.rng", "xgeodesic", 
        "ltco", "stnd.d")
        suppressWarnings(sfLibrary(widals))
    }
    
    if( length(lags) == 1 & lags[1] == 0 ) {
        p.ndx.ls <- list( c(1,2), c(3,4), c(5,7) )
    } else {
        p.ndx.ls <- list( c(1,2), c(3,4), c(5,6,7) )
    }
    assign( "p.ndx.ls", p.ndx.ls, pos=globalenv() )
    
    f.d <- list( dlog.norm, dlog.norm, dlog.norm, dlog.norm, dlog.norm, 
    dlog.norm, dlog.norm )
    assign( "f.d", f.d, pos=globalenv() )
    
    FUN.MH <- function(jj, GP.mx, X) {
        
        if(cv==2) { ZhalsA <- Hals.fastcv.snow(jj, rm.ndx, Z, XA.Hs, XA.Ht, XA.Hst.ls, GP.mx) }
        if(cv==-2) { ZhalsA <- Hals.snow(jj, Z, XA.Hs, XA.Ht, XA.Hst.ls, b.lag, GP.mx) }
        Z.resids.A <- Z - ZhalsA
        
        Z.wid <- widals.snow(jj, rm.ndx=rm.ndx, Z=Z.resids.A, Hs=XB.Hs, Ht=XB.Ht, Hst.ls=XB.Hst.ls, 
        locs=locs, lags=lags, b.lag=b.lag, cv=cv, geodesic=xgeodesic,
        wrap.around=NULL, GP.mx[ , c(3:7), drop=FALSE], stnd.d=stnd.d, ltco=ltco)
        
        Z.wid <- Z.wid + ZhalsA
         
        if( min(Z, na.rm=TRUE) >= 0 ) { Z.wid[ Z.wid < 0 ] <- 0 } ####### DZ EDIT
        
        Z.wid <- Z.clean.up(Z.wid)
        
        resids <- Z[ , unlist(rm.ndx)] - Z.wid[ , unlist(rm.ndx)]
        our.cost <- sqrt( mean( resids[ train.rng, ]^2 ) )
        
        if( is.nan(our.cost) ) { our.cost <- Inf }
        
        return( our.cost )
    }
    assign( "FUN.MH", FUN.MH, pos=globalenv() )
    
    #FUN.GP <- NULL
    FUN.GP <- function(GP.mx) {
        GP.mx[ GP.mx[ , 1] > rho.upper.limit, 1 ] <- rho.upper.limit
        GP.mx[ GP.mx[ , 2] < rgr.lower.limit, 2 ] <- rgr.lower.limit
        GP.mx[ GP.mx[ , 2] > rgr.upper.limit, 2 ] <- rgr.upper.limit
        
        GP.mx[ GP.mx[ , 3] > rho.upper.limit, 3 ] <- rho.upper.limit
        GP.mx[ GP.mx[ , 4] < rgr.lower.limit, 4 ] <- rgr.lower.limit
        GP.mx[ GP.mx[ , 4] > rgr.upper.limit, 4 ] <- rgr.upper.limit

        GP.mx[ GP.mx[ , 5] < d.alpha.lower.limit, 5 ] <- d.alpha.lower.limit
        xperm <- order(GP.mx[ , 5,  drop=FALSE])
        GP.mx <- GP.mx[ xperm,  ,  drop=FALSE]
        return(GP.mx)
    }
    assign( "FUN.GP", FUN.GP, pos=globalenv() )
    
    FUN.I <- function(envmh, X) {
        cat( "Improvement ---> ", envmh$current.best, " ---- " , envmh$GP, "\n" )
    }
    assign( "FUN.I", FUN.I, pos=globalenv() )
    
    FUN.EXIT <- function(envmh, X) {
        
        GP.mx <- matrix(envmh$GP, 1, length(envmh$GP))
        
        if(cv==2) { ZhalsA <- Hals.fastcv.snow(1, rm.ndx, Z, XA.Hs, XA.Ht, XA.Hst.ls, GP.mx) }
        if(cv==-2) { ZhalsA <- Hals.snow(1, Z, XA.Hs, XA.Ht, XA.Hst.ls, b.lag, GP.mx) }
        
        Z.resids.A <- Z - ZhalsA
        
        Z.wid <- widals.snow(1, rm.ndx=rm.ndx, Z=Z.resids.A, Hs=XB.Hs, Ht=XB.Ht, Hst.ls=XB.Hst.ls, 
        locs=locs, lags=lags, b.lag=b.lag, cv=cv, geodesic=xgeodesic,
        wrap.around=NULL, GP.mx[ , c(3:7), drop=FALSE], stnd.d=stnd.d, ltco=ltco)
        
        Z.wid <- Z.wid + ZhalsA
       
        if( min(Z, na.rm=TRUE) >= 0 ) { Z.wid[ Z.wid < 0 ] <- 0 } ############ DZ EDIT
        
        assign( "Z.wid", Z.wid, envir=globalenv() )
        
        Z.wid <- Z.clean.up(Z.wid)
        
        resids <- Z[ , unlist(rm.ndx)] - Z.wid[ , unlist(rm.ndx)]
        our.cost <- sqrt( mean( resids[ test.rng, ]^2 ) )
        
        if( is.nan(our.cost) ) { our.cost <- Inf }
        
        cat( envmh$GP, " -- ", our.cost,   "\n" )
        
        assign( "our.cost", our.cost, pos=globalenv() )
        assign( "GP", envmh$GP, pos=globalenv() )
        cat( paste( "GP <- c(", paste(format(GP,digits=5), collapse=", "), ") ### ", 
        format(our.cost, width=6), "\n", sep="" ) )
    }
    assign( "FUN.EXIT", FUN.EXIT, pos=globalenv() )
}
@
}


Pseudo CV:

<<eval=FALSE, results=hide>>=
GP <- c(1/10, 1, 1/10, 1,      5, 3, 1)
sds.mx <- seq(2, 0.01, length=k.glob) * matrix(1, k.glob, length(GP))
ltco <- -10
stnd.d <- TRUE

rm.ndx <- I(1:n)
cv <- -2
lags <- c(0)
b.lag <- -1

d.alpha.lower.limit <- 0
rho.upper.limit <- 100
rgr.lower.limit <- 10^(-7)
rgr.upper.limit <- 500

FUN.GP <- NULL

sfInit(TRUE, k.cpus)
FUN.source <- fun.load.widals.ab
FUN.source()

set.seed(9999)
MSS.snow(FUN.source, NA, p.ndx.ls, f.d, sds.mx=sds.mx,
k.glob, k.loc.coef=7, X = NULL)
sfStop()
#### 11.5234
@


Real sitewise CV:

<<eval=FALSE, results=hide>>=
GP <- c(1/10, 1, 1/10, 1,      0.5, 3, 1)
sds.mx <- seq(2, 0.01, length=k.glob) * matrix(1, k.glob, length(GP))
ltco <- -10
stnd.d <- TRUE

rm.ndx <- create.rm.ndx.ls(n, 14)
cv <- 2
lags <- c(0)
b.lag <- 0

d.alpha.lower.limit <- 0
rho.upper.limit <- 100
rgr.lower.limit <- 10^(-7)
rgr.upper.limit <- 500

FUN.GP <- NULL

sfInit(TRUE, k.cpus)
FUN.source <- fun.load.widals.ab
FUN.source()

set.seed(9999)
MSS.snow(FUN.source, NA, p.ndx.ls, f.d, sds.mx=sds.mx,
k.glob, k.loc.coef=9, X = NULL)
sfStop()
@


\section{Simulation} \label{SecSim}

<<eval=FALSE, results=hide>>=

options(stringsAsFactors=FALSE)

k.cpus <- 2 #### set the number of cpus for snowfall


library(widals)
tau <- 210
n.all <- 300

set.seed(77777)
locs.all <- cbind(runif(n.all), runif(n.all))

D.mx <- distance(locs.all, locs.all, FALSE)

Q <- 0.03*exp(-2*D.mx)
F <- 0.99
R <- diag(1, n.all)
beta0 <- rep(0, n.all)
H <- diag(1, n.all)

xsssim <- SS.sim(F, H, Q, R, length.out=tau, beta0=beta0)
Y1 <- xsssim$Y


Hst.ss <- list()
for(tt in 1:tau) {
Hst.ss[[tt]] <- cbind( rep(sin(tt*2*pi/tau), n.all), rep(cos(tt*2*pi/tau), n.all) )
colnames(Hst.ss[[tt]]) <- c("sinet", "cosinet")
}
Ht.original <- cbind( sin((1:tau)*2*pi/tau), cos((1:tau)*2*pi/tau) )

Q2 <- diag(0.03, ncol(Hst.ss[[1]]))
F2 <- 0.99
beta20 <- rep(0, ncol(Hst.ss[[1]]))
R2 <- 1*exp(-3*D.mx) + diag(0.001, n.all)

xsssim2 <- SS.sim.tv(F2, Hst.ss, Q2, R, length.out=tau, beta0=beta20)
Z2 <- xsssim2$Z


Z.all <- Y1 + Z2


#################### plot
z.min <- min(Z.all)
for(tt in 1:tau) {
   plot(locs.all, cex=(Z.all[ tt, ]-z.min)*0.3,   main=tt)
   Sys.sleep(0.1)
}
@


Just using temporal covariates:

<<eval=FALSE, results=hide>>=
train.rng <- 30:tau
test.rng <- train.rng


xgeodesic <- FALSE
Z <- Z.all
locs <- locs.all
n <- n.all
Ht <- Ht.original
Hs <- matrix(1, nrow=n)
Hst.ls <- NULL


rm.ndx <- create.rm.ndx.ls(n, 14)
k.glob <- 10
run.parallel <- TRUE

FUN.source <- fun.load.widals.a

d.alpha.lower.limit <- 0
rho.upper.limit <- 100
rgr.lower.limit <- 10^(-7)

GP <- c(1/10, 1, 0.01, 3, 1)
cv <- 2
lags <- c(0)
b.lag <- 0

sds.mx <- seq(2, 0.01, length=k.glob) * matrix(1, k.glob, length(GP))
ltco <- -10
stnd.d <- TRUE

FUN.GP <- NULL

sfInit(TRUE, k.cpus)
FUN.source()

set.seed(99999)
MSS.snow(FUN.source, NA, p.ndx.ls, f.d, sds.mx=sds.mx,
k.glob, k.loc.coef=7, X = NULL)
sfStop()
@


Now, using a second ALS stage for the bases transformation:

<<eval=FALSE, results=hide>>=
library(LatticeKrig)

xxb <- 7
yyb <- 7
center <- as.matrix(expand.grid( seq( 0, 1, length=xxb), seq( 0, 1, length=yyb)))

###### run together
ffunit <- function(x) { return( (x-min(x)) / (max(x)-min(x)) ) }
locs.unit <- apply(locs, 2, ffunit)
locs.unit <- matrix(as.vector(locs.unit), ncol=2)
###### run togther

xPHI <- Radial.basis(as.matrix(locs.unit), center, 0.5)
Hs.lkrig <- matrix(NA, nrow(locs), xxb*yyb)
for(i in 1:nrow(locs)) {
    Hs.lkrig[i, ] <- xPHI[i]
}

XA.Hs <- Hs
XA.Ht <- Ht.original
XA.Hst.ls <- NULL

Hst.sumup(XA.Hst.ls, XA.Hs, XA.Ht)

XB.Hs <- 10*Hs.lkrig
XB.Ht <- NULL
XB.Hst.ls <- NULL

Hst.sumup(XB.Hst.ls, XB.Hs, XB.Ht)

GP <- c(1/10, 1, 1/10, 1,      5, 3, 1)
sds.mx <- seq(2, 0.01, length=k.glob) * matrix(1, k.glob, length(GP))

rm.ndx <- create.rm.ndx.ls(n, 14)
cv <- 2
lags <- c(0)
b.lag <- 0

d.alpha.lower.limit <- 0
rho.upper.limit <- 100
rgr.lower.limit <- 10^(-7)
rgr.upper.limit <- 100

FUN.GP <- NULL

sfInit(TRUE, k.cpus)
FUN.source <- fun.load.widals.ab
FUN.source()

set.seed(9999)
MSS.snow(FUN.source, NA, p.ndx.ls, f.d, sds.mx=sds.mx,
k.glob, k.loc.coef=7, X = NULL)
sfStop()
@


%\section{Final Thoughts}


%\bibliographystyle{plainnat}

%\bibliographystyle{jes}

\bibliographystyle{abbrv}

%\bibliography{funwithfunload}
\bibliography{widals}


 \end{document}