\documentclass[article,nojss]{jss} % add nojss for vignette

%\VignetteIndexEntry{BVAR: Bayesian Vector Autoregressions with Hierarchical Prior Selection in R}
%\VignettePackage{BVAR}
%\VignetteKeyword{multivariate}
%\VignetteKeyword{time series}
%\VignetteKeyword{hierarchical}
%\VignetteKeyword{impulse responses}
%\VignetteKeyword{forecast}
%\VignetteKeyword{macroeconomics}

\SweaveOpts{engine=R, eps=FALSE, keep.source=TRUE, prefix.string=fig}

<<preliminaries, echo=FALSE, results=hide>>=
options(prompt = "R> ", continue = "+  ", width = 70, useFancyQuotes = FALSE)
@


%% -- LaTeX packages and custom commands ---------------------------------------

%% recommended packages
\usepackage{thumbpdf,lmodern}

%% custom packages
\usepackage{float}
\usepackage{graphicx}
\usepackage{mathtools}
\usepackage{multicol}
\usepackage{natbib}
\usepackage{amsmath,amssymb}
\usepackage{placeins}


%% new custom commands
\newcommand{\class}[1]{`\code{#1}'} % Hard-coded instead
\newcommand{\fct}[1]{\code{#1()}}


%% -- Article metainformation (author, title, ...) -----------------------------

%% - \author{} with primary affiliation
%% - \Plainauthor{} without affiliations
%% - Separate authors by \And or \AND (in \author) or by comma (in \Plainauthor).
%% - \AND starts a new line, \And does not.
\author{Nikolas Kuschnig \\ WU Vienna University \\ of Economics and Business % \And
  \And Lukas Vashold \\ WU Vienna University \\ of Economics and Business}
\Plainauthor{Nikolas Kuschnig, Lukas Vashold}

%% - \title{} in title case
%% - \Plaintitle{} without LaTeX markup (if any)
%% - \Shorttitle{} with LaTeX markup (if any), used as running title
\title{\pkg{BVAR}: Bayesian Vector Autoregressions with Hierarchical Prior Selection in \proglang{R}}
\Plaintitle{BVAR: Bayesian Vector Autoregressions with Hierarchical Prior Selection in R}
\Shorttitle{\pkg{BVAR}: Hierarchical Bayesian VARs in \proglang{R}}

% \Volume{1}
% \Issue{1}
% \Month{1}
% \Year{1}
% \Submitdate{2019-06-30}
% \Acceptdate{1970-01-01}

%% - \Abstract{} almost as usual
\Abstract{
  Vector autoregression (VAR) models are widely used for multivariate time series analysis in macroeconomics, finance, and related fields. Bayesian methods are often employed to deal with their dense parameterization, imposing structure on model coefficients via prior information. The optimal choice of the degree of informativeness implied by these priors is subject of much debate and can be approached via hierarchical modeling.
  This paper introduces \pkg{BVAR}, an \proglang{R} package dedicated to the estimation of Bayesian VAR models with hierarchical prior selection. It implements functionalities and options that permit addressing a wide range of research problems, while retaining an easy-to-use and transparent interface. Features include structural analysis of impulse responses, forecasts, the most commonly used conjugate priors, as well as a framework for defining custom dummy-observation priors.
  \pkg{BVAR} makes Bayesian VAR models user-friendly and provides an accessible reference implementation.
}

%% - \Keywords{} with LaTeX markup, at least one required
%% - \Plainkeywords{} without LaTeX markup (if necessary)
%% - Should be comma-separated and in sentence case.
\Keywords{vector autoregression (VAR), multivariate, time series,
  macroeconomics, structural analysis, hierarchical model,
  forecast, impulse response, identification, Minnesota prior,
  FRED-MD, Bayesian econometrics}
\Plainkeywords{vector autoregression (VAR), multivariate, time series,
  macroeconomics, structural analysis, hierarchical model,
  forecast, impulse respose, identification, Minnesota prior,
  FRED-MD, Bayesian econometrics}
%% - \Address{} of at least one author
%% - May contain multiple affiliations for each author
%%   (in extra lines, separated by \emph{and}\\).
%% - May contain multiple authors for the same affiliation
%%   (in the same first line, separated by comma).
\Address{
  Nikolas Kuschnig \\
  Vienna University of Economics and Business \\
  Institute for Macroeconomics \emph{and} \\
  Institute for Ecological Economics \\
  Welthandelsplatz 1 \\
  1020 Vienna, Austria \\
  E-mail: \email{nikolas.kuschnig@wu.ac.at} \\
  URL: \url{https://kuschnig.eu/} \\\\
  Lukas Vashold \\
  Vienna University of Economics and Business \\
  Institute for Macroeconomics \\
  Welthandelsplatz 1 \\
  1020 Vienna, Austria \\
  E-mail: \email{lukas.vashold@wu.ac.at}
}

\begin{document}
% hi mom
\SweaveOpts{concordance=FALSE}


%% -- Introduction -------------------------------------------------------------

%% - In principle "as usual".
%% - But should typically have some discussion of both _software_ and _methods_.
%% - Use \proglang{}, \pkg{}, and \code{} markup throughout the manuscript.
%% - If such markup is in (sub)section titles, a plain text version has to be
%%   added as well.
%% - All software mentioned should be properly \cite-d.
%% - All abbreviations should be introduced.
%% - Unless the expansions of abbreviations are proper names (like "Journal
%%   of Statistical Software" above) they should be in sentence case (like
%%   "generalized linear models" below).


\section{Introduction} \label{sec:intro}

% Introduce
Vector autoregression (VAR) models, popularized by \cite{sims1980}, have become a staple of empirical macroeconomic research \citep{kilian2017}. They are widely used for multivariate time series analysis and have been applied to evaluate macroeconomic models \citep{delnegro2007}, investigate the effects of monetary policy \citep{bernanke2005, sims2006}, and conduct forecasting exercises \citep{litterman1986, koop2013}.
Their large number of parameters and the limited temporal availability of macroeconomic datasets often lead to over-parameterization problems \citep{koop2010} that can be mitigated by introducing prior information within a Bayesian framework. Informative priors are used to impose additional structure on the model and push it towards proven benchmarks. The resulting models display reduced parameter uncertainty and significantly enhanced out-of-sample forecasting performance \citep{koop2013}. However, the specific choice and parameterization of these priors poses a challenge that remains at the heart of both discussion and critique.
A number of heuristics for prior selection have been proposed in the literature.
\cite{giannone2015} tackle this problem by setting prior informativeness in a data-based fashion, in the spirit of hierarchical modeling. Their flexible approach alleviates the subjectivity of setting prior parameters and explicitly acknowledges uncertainty surrounding these choices. The conjugate model setup allows for efficient estimation and has been shown to perform remarkably well in common analyses.

% Provide background, narrow down domain
With the rise of Markov chain Monte Carlo (MCMC) methods, Bayesian statistical software has evolved rapidly. Established software provides flexible and extensible tools for Bayesian inference, which are available cross-platform. This includes \pkg{BUGS} \citep{lunn2000, lunn2009} and \pkg{JAGS} \citep{plummer2003}, which build on the Gibbs sampler, \pkg{Stan} \citep{carpenter2017}, which builds on the Hamiltonian Monte Carlo algorithm, as well as \pkg{R-INLA} \citep{rue2015}, for approximate inference.
Domain-specific inference is facilitated by specialized packages, such as \pkg{MCMCglmm} \citep{hadfield2010} and \pkg{brms} \citep{burkner2017, burkner2018} for the \proglang{R} language \citep{R}.
In the domain of multivariate time series analysis, the \proglang{R} package \pkg{vars} \citep{pfaff2008} represents a cornerstone. It offers a comprehensive set of frequentist VAR-related functionalities, including the calculation and visualization of forecasts, impulse responses, and forecast error variance decompositions. Other related packages include \pkg{MTS} \citep{tsay2018}, \pkg{BigVAR} \citep{nicholson2019}, and \pkg{tsDyn} \citep{dinarzo2020}, for a powerful and mature assortment of software.

% Bayesian VAR domain
Currently there exists no equivalent to \pkg{vars} for Bayesian VAR models in \proglang{R}. Applied work is often performed via ad hoc scripts, compromising reproducibility. Some \proglang{R} packages provide specialized implementations of Bayesian VAR models, but lack flexibility and accessibility.
The \pkg{bvarsv} package \citep{krueger2015} implements estimation of a model with time-varying parameters and stochastic volatility by \cite{primiceri2005}.
\pkg{mfbvar}, by \cite{ankargren2020}, implements estimation of mixed-frequency VAR models and provides forecasting routines. Several common prior distributions as well as stochastic volatility methods are available, but functions for structural analysis and inference are lacking.
Another approach is taken by the \pkg{bvartools} package \citep{mohr2020}, which provides functions to assist with Bayesian inference in VAR models, but does not include routines for estimation.
Despite the popularity of Bayesian VAR models, there is a considerable gap between specialized Bayesian and accessible, all-purpose implementations.

% In comes BVAR
In this paper, we present \pkg{BVAR} \citep{kuschnig2021}, a comprehensive and user-friendly \proglang{R} package for the estimation and analysis of Bayesian VAR models. It implements a hierarchical modeling approach to prior selection in the fashion of \cite{giannone2015}. Functionalities to facilitate most common analyses are provided. Standard methods and interfaces to existing frameworks ensure accessibility and extensibility. \pkg{BVAR} is free software, licensed under the GNU General Public License 3, openly available and developed online.\footnote{See the Comprehensive \proglang{R} Archive Network (CRAN, \url{https://CRAN.R-project.org/package=BVAR}) and \proglang{GitHub} (\url{https://github.com/nk027/bvar}).}

The remainder of this paper is structured as follows. Section~\ref{sec:econ} describes the econometric framework used in the package. Section~\ref{sec:bvar} provides an overview of \pkg{BVAR} and its functionalities, with their usage demonstrated by means of an example in Section~\ref{sec:demo}. Section~\ref{sec:concl} concludes. Additional information is provided in the Appendix.


%% -- Manuscript ---------------------------------------------------------------

%% - In principle "as usual" again.
%% - When using equations (e.g., {equation}, {eqnarray}, {align}, etc.
%%   avoid empty lines before and after the equation (which would signal a new
%%   paragraph.
%% - When describing longer chunks of code that are _not_ meant for execution
%%   (e.g., a function synopsis or list of arguments), the environment {Code}
%%   is recommended. Alternatively, a plain {verbatim} can also be used.
%%   (For executed code see the next section.)

\newpage % sed_delete - hard-formatted
\section{Econometric framework} \label{sec:econ}

% Introduce econometric background
\pkg{BVAR} takes a Bayesian hierarchical modeling approach to VAR models. This section introduces the model, prior specification, and the hierarchical prior selection procedure proposed by \cite{giannone2015}. For further information on VAR models, the Bayesian approach to them, as well as Bayesian estimation and inference in general we refer the interested reader to \cite{kilian2017}, \cite{koop2010}, and \cite{gelman2013}, respectively.

\subsection{Model specification} \label{subsec:model}

% About VARs
VAR models are a generalization of univariate autoregressive (AR) models, based on the notion of interdependencies between lagged values of all variables in a given model.
They are commonly resorted to as tools for investigating dynamic effects of shocks and perform very well in forecasting exercises. A VAR model of finite order $p$, referred to as VAR($p$) model, can be expressed as:
\begin{align} \label{equ:var}
  \boldsymbol{y}_t = \boldsymbol{a}_0 + \boldsymbol{A}_1 \boldsymbol{y}_{t-1} + \dots + \boldsymbol{A}_p \boldsymbol{y}_{t-p} + \boldsymbol{\epsilon}_t,
  \text{ with }
  \boldsymbol{\epsilon}_t \sim \mathcal{N} (0, \boldsymbol{\Sigma}),
\end{align}
where $\boldsymbol{y}_t$ is an $M \times 1$ vector of endogenous variables, $\boldsymbol{a}_0$ is an $M \times 1$ intercept vector, $\boldsymbol{A}_j$ ($j = 1, \ldots, p$) are $M \times M$ coefficient matrices, and $\boldsymbol{\epsilon}_t$ is an $M \times 1$ vector of exogenous Gaussian shocks with zero mean and variance-covariance (VCOV) matrix $\boldsymbol{\Sigma}$.
The number of coefficients to be estimated is $M + M^2 p$, rising quadratically with the number of included variables and linearly in the lag order.
Such a dense parameterization often leads to inaccuracies with regard to out-of-sample forecasting and structural inference, especially for higher-dimensional models. This phenomenon is commonly referred to as the curse of dimensionality.

% Bayesian VARs
The Bayesian approach to estimating VAR models tackles this limitation by imposing additional structure on the model. Informative conjugate priors have been shown to be effective in mitigating the curse of dimensionality and allow for large models to be estimated \citep[see][]{doan1984, banbura2010}. They push the model parameters towards a parsimonious benchmark, reducing estimation error and improving out-of-sample prediction accuracy \citep{koop2013}.
This type of shrinkage is related to frequentist regularization approaches \citep{hoerl1970, tibshirani1996}, which is discussed in detail by \cite{demol2008}, among others.
The flexibility of the Bayesian framework allows for the accommodation of a wide range of economic issues, naturally involves prior information, and can account for layers of uncertainty through hierarchical modeling \citep{gelman2013}.

\subsection{Prior selection and specification} \label{subsec:prior}

% Introduce priors
Properly informing prior beliefs is critical and hence the subject of much research.
In the multivariate context, flat priors, which attempt not to impose a certain belief, yield inadmissible estimators \citep{stein1956} and poor inference \citep{sims1980, banbura2010}. Other uninformative or informative priors are necessary. Early contributions \citep{litterman1980} set priors and their hyperparameters in a way that maximizes out-of-sample forecasting performance over a pre-sample. \cite{delnegro2004} choose values that maximize the marginal likelihood. \cite{banbura2010} use the in-sample fit as decision criterion and control for overfitting.
Economic theory is a preferred source of prior information, but is lacking in many settings --- in particular for high-dimensional models. Acknowledging this, \cite{villani2009} reformulates the model and places priors on the steady state, on which economic theory often focuses and is hence better understood by economists.


% Hierarchical approach
\cite{giannone2015} propose setting prior hyperparameters in a data-based fashion, i.e., by treating them as additional parameters to be estimated. In their hierarchical approach, prior hyperparameters are assigned their own hyperpriors.
% Uncertainty surrounding the choice of prior hyperparameters is acknowledged explicitly.
This can be expressed by invoking Bayes' law as:
\begin{align}
  \label{equ:hm1}
  p(\boldsymbol{\gamma} | \boldsymbol{y}) & \propto p(\boldsymbol{y} | \boldsymbol{\theta}, \boldsymbol{\gamma}) p(\boldsymbol{\theta} | \boldsymbol{\gamma}) ~ p(\boldsymbol{\gamma}),
  \\
  \label{equ:hm2}
  p(\boldsymbol{y} | \boldsymbol{\gamma}) & = \int p(\boldsymbol{y} | \boldsymbol{\theta}, \boldsymbol{\gamma}) p(\boldsymbol{\theta} | \boldsymbol{\gamma}) d \boldsymbol{\theta},
\end{align}
where $\boldsymbol{y} = (\boldsymbol{y}_{p+1}, \ldots, \boldsymbol{y}_{T})^\top$, the autoregressive and variance parameters of the VAR model are denoted by $\boldsymbol{\theta}$, and the set of hyperparameters with $\boldsymbol{\gamma}$. The first part of Equation~\ref{equ:hm1} is marginalized with respect to the parameters $\boldsymbol{\theta}$ in Equation~\ref{equ:hm2}. This yields a density of the data as a function of the hyperparameters $p(\boldsymbol{y} | \boldsymbol{\gamma})$, also called marginal likelihood (ML). This quantity is marginal with respect to the parameters $\boldsymbol{\theta}$, but conditional on the hyperparameters $\boldsymbol{\gamma}$.
The ML can be used as a decision criterion for the hyperparameter choice; maximization constitutes an empirical Bayes method, with a clear frequentist interpretation.
In the Bayesian hierarchical approach, the ML is used to explore the full posterior hyperparameter space, acknowledging uncertainty surrounding them.
This yields robust inference, is theoretically grounded, and can be implemented in an efficient manner \citep{giannone2015}. The authors demonstrate the high accuracy of impulse response functions and forecasts, with the model performing competitively compared to factor models. Since then, their approach has been used extensively in applied research \citep[see e.g.,][]{baumeister2016, altavilla2018, nelson2018, altavilla2019, miranda-agrippino2020}.


% Specific priors, introduce NIW
The contribution of \cite{giannone2015} focuses on conjugate prior distributions, specifically of the Normal-inverse-Wishart (NIW) family.\footnote{\cite{demol2008} note that this setting is similar to ridge penalized estimation in frequentist terms.}
Conjugacy implies that the ML is available in closed form, enabling efficient computation.
The NIW family includes many of the most commonly used priors \citep{koop2010, karlsson2013}, with some notable exceptions. These include, amongst others, the steady-state prior \citep{villani2009}, the Normal-Gamma prior \citep{griffin2010, huber2019}, and the Dirichlet-Laplace prior \citep{bhattacharya2015}.
Many recent contributions focus on accounting for heteroskedastic error structures \citep{clark2011, kastner2014, carriero2016}. This may improve model performance, but is not possible within the conjugate setup and would complicate inference.
In the chosen NIW framework we approach the model in Equation~\ref{equ:var} by letting $\boldsymbol{A} = \left[ \boldsymbol{a}_0, \boldsymbol{A}_1, \dots, \boldsymbol{A}_p \right]^\top$ and $\boldsymbol{\beta} = vec(\boldsymbol{A})$. Then the conjugate prior setup reads as:
\begin{align}
    \label{equ:niw}
    \boldsymbol{\beta} | \boldsymbol{\Sigma} &\sim \mathcal{N}(\boldsymbol{b}, \boldsymbol{\Sigma} \otimes \boldsymbol{\Omega}), \\
    \boldsymbol{\Sigma} &\sim \mathcal{IW} (\boldsymbol{\Psi}, \boldsymbol{d}), \nonumber
\end{align}
where $\boldsymbol{b}$, $\boldsymbol{\Omega}$, $\boldsymbol{\Psi}$ and $\boldsymbol{d}$ are functions of a lower-dimensional vector of hyperparameters $\boldsymbol{\gamma}$. In their paper, \cite{giannone2015} consider three specific priors: the so-called Minnesota (Litterman) prior, which is used as a baseline, the sum-of-coefficients prior and the single-unit-root prior \citep[see also][]{sims1998}.

% Minnesota prior
The Minnesota prior \citep{litterman1980} imposes the hypothesis that individual variables all follow random walk processes. This parsimonious specification typically performs well in forecasts of macroeconomic time series \citep{kilian2017} and is often used as a benchmark to evaluate accuracy. The prior is characterized by the following moments:
\begin{align}
    \mathbb{E}[(\boldsymbol{A}_s)_{ij}|\boldsymbol{\Sigma}] &=
    \begin{cases}
        1 &\text{if } i=j \text{ and } s=1, \\
        0 &\text{otherwise}.
    \end{cases} \nonumber \\
    cov \left[(\boldsymbol{A}_s)_{ij}, (\boldsymbol{A}_r)_{kl} | \boldsymbol{\Sigma} \right] &=
    \begin{cases}
        \lambda^2 \frac{1}{s^\alpha} \frac{\boldsymbol{\Sigma}_{ik}}{\psi_j / (d-M-1)} &\text{if } l=j \text{ and } r=s, \\
        0 &\text{otherwise}. \nonumber
    \end{cases}
\end{align}
The key hyperparameter $\lambda$ controls the tightness of the prior, i.e., it weighs the relative importance of prior and data. For $\lambda \to 0$ the prior outweighs any information in the data; the posterior approaches the prior. As $\lambda \to \infty$ the posterior distribution mirrors the sample information.
Governing the variance decay with increasing lag order, $\alpha$ controls the degree of shrinkage for more distant observations. Finally, $\psi_j$, the $j$-th variable of $\boldsymbol{\Psi}$, controls the prior's standard deviation on lags of variables other than the dependent.

% Dummy priors
Refinements of the Minnesota prior are often implemented as additional priors trying to ``reduce the importance of the deterministic component implied by VAR models estimated conditioning on the initial observations'' \citep[p.~440]{giannone2015}. This component is defined as the expectation of future observations, given initial conditions and estimated coefficients.
The sum-of-coefficients prior \citep{doan1984} is one example for such an additional prior. It imposes the notion that a no-change forecast is optimal at the beginning of a time series. The prior can be implemented by adding artificial dummy-observations on top of the data matrix. They are constructed as follows:
\begin{align}
    \underset{M \times M}{\boldsymbol{y^+}} &= diag \left(\frac{\boldsymbol{\bar{y}}}{\mu}\right), \nonumber \\
    \underset{M \times (1 + Mp)}{\boldsymbol{x^+}} &= [\boldsymbol{0}, \boldsymbol{y^+}, \dots, \boldsymbol{y^+}], \nonumber
\end{align}
where $\boldsymbol{\bar{y}}$ is a $M \times 1$ vector of averages over the first $p$ --- denoting the lag order --- observations of each variable.
The key hyperparameter $\mu$ controls the variance and hence, the tightness of the prior. For $\mu \to \infty$ the prior becomes uninformative, while for $\mu \to 0$ the model is pulled towards a form with as many unit roots as variables and no cointegration.
The latter imposition motivates the single-unit-root prior \citep{sims1993, sims1998}, which allows for cointegration relationships in the data. The prior pushes the variables either towards their unconditional mean or towards the presence of at least one unit root. Its associated dummy observations are:
\begin{align}
    \underset{1 \times M}{\boldsymbol{y^{++}}} &= \frac{\boldsymbol{\bar{y}}^\top}{\delta}, \nonumber \\
    \underset{1 \times (1 + Mp)}{\boldsymbol{x^{++}}} &= \left[\frac{1}{\delta}, \boldsymbol{y^{++}}, \dots, \boldsymbol{y^{++}}\right], \nonumber
\end{align}
where $\boldsymbol{\bar{y}}$ is again defined as above. Similarly to before, $\delta$ is the key hyperparameter and governs the tightness of the prior.
The sum-of-coefficients and single-unit-root dummy-observation priors are commonly used in the estimation of VAR models in levels and fit the hierarchical approach to prior selection. Note however, that the approach is applicable to all priors from the NIW family in Equation~\ref{equ:niw}, yielding a flexible and readily extensible framework.


\section[The BVAR package]{The \pkg{BVAR} package} \label{sec:bvar}

% Introduce BVAR
\pkg{BVAR} implements a hierarchical approach to prior selection \citep{giannone2015} into \proglang{R} and hands the user an easy-to-use and flexible tool for Bayesian VAR models.
Its primary use cases are in the field of macroeconomic time series analysis and it is an ideal tool for exploring a range of economic phenomena. It may be consulted as a reference for similar models, with the hierarchical prior selection serving as a safeguard against unreasonable hyperparameter choices that are not supported by the data.
The accessible and user-friendly implementation make it a suitable tool for introductions to Bayesian multivariate time series modeling and for quick, versatile analysis.

% Mention some abstract features
The package is available cross-platform and on minimal installations, with no dependencies outside base \proglang{R}, and imports from \pkg{mvtnorm} \citep{genz2020}. It is implemented in native \proglang{R} for transparency and in order to lower the bar for contributions and/or adaptations.
A functional approach to the package structure facilitates optimization of computationally intensive steps, including ports to e.g., \proglang{C++}, and ensures extensibility. The complete documentation, helper functions to access the multitude of settings, and use of established methods for analysis make the package easy to operate, without sacrificing flexibility.

% Mention usage features
\pkg{BVAR} features extensive customization options with regard to the elicited priors, their hyperparameters, and hierarchical treatment of them. The Minnesota prior is used as baseline; all of its hyperparameters are adjustable and can be treated hierarchically.
Users can easily include the sum-of-coefficients and single-unit-root priors of \cite{sims1998}. The flexible implementation also allows users to construct custom dummy-observation priors.
Further options are devoted to the MCMC method and the Metropolis-Hastings (MH) algorithm, which is used to explore the posterior hyperparameter space. The number of burned and saved draws are adjustable; thinning may be employed to reduce memory requirements and serial correlation. Proper exploration of the posterior is facilitated by options to manually scale individual proposals for the MH step, or to enable automatic scaling until a target acceptance rate is achieved.
The customization options can be harnessed for flexible analysis with a number of established and specialized methods.

% Forecasts
A major function and common application of VAR models are predictions. VAR-based forecasts have proven to be superior to many other methods \citep{banbura2010, koop2013}. They do not rely on inducing particular restrictions on model parameters, as is the case for structural models. \pkg{BVAR} can be used to conduct both classic unconditional as well as conditional forecasts.
Unconditional forecasts are implemented to mirror base \proglang{R} for straightforward use. They rival those obtained from factor models in accuracy \citep{giannone2015} and can be used for a variety of analyses.
Conditional forecasts allow for elaborate scenario analyses, where the future path of one or more variables is assumed to be known. They are a handy tool for analyzing possible realizations of policy-relevant variables. The algorithmic implementation of conditional forecasts follows \cite{waggoner1999} and is closely linked to structural analysis.

% IRF
Impulse response functions (IRF) are a central tool for structural analysis. They provide insights into the behavior of economic systems and are another cornerstone of inference with VAR models.
IRF serve as a representation of shocks hitting the economic system and are used to analyze the reactions of individual variables. The exact propagation of these shocks is of great interest, but meaningful interpretation relies on proper identification.
\pkg{BVAR} features a framework for identification schemes, with some of the most popular schemes readily available --- namely short-term zero restrictions, sign restrictions, as well as a combination of zero and sign restrictions. The first is also known as recursive identification and is achieved via Cholesky decomposition of the VCOV matrix $\boldsymbol{\Sigma}$ \citep[see][Chapter 8]{kilian2017}. This approach is computationally cheap and achieves exact identification without the need for detailed assumptions about variable behavior. Only the contemporaneous reactions of certain variables are limited, making the order of variables pivotal.
Sign restrictions \citep[see][Chapter 13]{kilian2017} are another popular means of identification that is implemented following the approach of \cite{rubio-ramirez2010}. This scheme requires some presumptions about the behavior of variables following a certain shock. With increasing dimension of the model theoretically grounding such presumptions becomes increasingly challenging. An extension of this identification scheme, put forth by \cite{arias2018}, allows for simultaneously imposing sign and zero restrictions, providing even more flexibility.
% Additionally, identification via sign restrictions comes at the cost of increased uncertainty and a loss of precision for the resulting IRF.
Another related tool for structural analysis are forecast error variance decompositions (FEVD). They are used to investigate which variables drive the paths of others after a given shock. FEVD can easily be computed in \pkg{BVAR} and allow for a more detailed structural analysis of the processes determining the behavior of an economic system.

% FRED datasets
\pkg{BVAR} packages the popular FRED-MD and FRED-QD databases \citep{mccracken2016,mccracken2020}. They constitute two of the largest macroeconomic databases, featuring more than 200 macroeconomic indicators on a monthly and quarterly basis, respectively. The databases describe the US economy, starting from 1959 and are updated regularly.
They are distributed in \pkg{BVAR} under a permissive modified Open Data Commons Attribution License (ODC-BY 1.0).
Together with helper functions to aid with transformations, they allow users to start using the package hassle-free. FRED-MD and FRED-QD lend themselves to the study of a wide range of economic phenomena and are regularly used in benchmarking exercises for newly developed models and methods \citep[see inter alia][]{carriero2018, koop2019, huber2020}.

% Conclude
To sum up, \pkg{BVAR} makes estimation of and inference in Bayesian VAR models accessible and user-friendly.
Extensive customization options are available, with sensible default settings allowing for a step-by-step adoption. This is further facilitated by lucid helper functions and comprehensive documentation.
Analysis of estimated VAR models is readily accessible --- functions for summarizing and plotting model parameters, forecasts, IRF, traces, densities, and residuals are available. Use of established procedures and standard methods, including \code{plot()}, \code{predict()}, \code{coef()}, and \code{summary()}, set a low entry barrier for \proglang{R} users.
Final and intermediate outputs are provided in an idiomatic format and feature \code{print()} methods for quick access and a transparent research process.
Existing frameworks may be used for further analysis --- an interface to \pkg{coda} \citep{plummer2006} for checking outputs, analysis, and diagnostics is provided. The \pkg{BVARverse} \citep{vashold2020} companion package allows integration into a workflow oriented towards the concept of tidy data \citep{wickham2014} and facilitates flexible plotting with \pkg{ggplot2} \citep{wickham2016}.
The available FRED-MD and FRED-QD datasets allow hassle-free exploration of macroeconomic research questions. These features make \pkg{BVAR} an ideal tool for macroeconomic analysis.


%% -- Illustrations ------------------------------------------------------------

%% - Virtually all JSS manuscripts list source code along with the generated
%%   output. The style files provide dedicated environments for this.
%% - In R, the environments {Sinput} and {Soutput} - as produced by Sweave() or
%%   or knitr using the render_sweave() hook - are used (without the need to
%%   load Sweave.sty).
%% - Equivalently, {CodeInput} and {CodeOutput} can be used.
%% - The code input should use "the usual" command prompt in the respective
%%   software system.
%% - For R code, the prompt "R> " should be used with "+  " as the
%%   continuation prompt.
%% - Comments within the code chunks should be avoided - these should be made
%%   within the regular LaTeX text.

% \newpage % sed_delete - hard-formatted
\section{An applied example} \label{sec:demo}

% Introduce the demonstration
In this section we demonstrate \pkg{BVAR} via an applied example. We use a subset of the included data and go through a typical workflow of (1) preparing the data, (2) configuring priors and other aspects of the model, (3) estimating the model, and finally (4) analyzing outputs, including IRF and forecasts. Further possible applications and examples are available in the Appendix.
We start by setting a seed for reproducibility and loading \pkg{BVAR}.

<<setup, eval=TRUE, echo=TRUE>>=
set.seed(42)
library("BVAR")
@

\subsection{Data preparation} \label{subsec:data}

% Load and transform dataentry
The main function \code{bvar()} expects input data to be coercible to a rectangular numeric matrix without any missing values. For this example, we use six variables from the included FRED-QD dataset \citep{mccracken2020}, akin to the medium VAR considered by \cite{giannone2015}.
The variables are real gross domestic product (GDP), real personal consumption expenditures, real gross private domestic investment (all three in billions of 2012 dollars), as well as the number of total hours worked in the non-farm business sector, the GDP deflator index as a means to measure price inflation, and the effective federal funds rate in percent per year. The currently covered time period ranges from Q1 1959 to Q3 2019.
We follow \cite{giannone2015} in transforming all variables except the federal funds rate to log-levels, in order to also demonstrate aforementioned dummy priors.
Transformation can be performed manually or with the helper function \code{fred_transform()}. The function supports transformations listed by \cite{mccracken2016,mccracken2020}, which can be accessed via their transformation codes, and automatic transformation.
See Appendix~\ref{app:data} for a demonstration of this and related functionalities.
For our example, we specify a log-transformation for the corresponding variables with code \code{4} and no transformation for the federal funds rate with code \code{1}.
Figure~\ref{fig:timeseries} provides an overview of the transformed time series.

<<data, eval=TRUE, echo=TRUE, results=hide>>=
x <- fred_qd[1:243, c("GDPC1", "PCECC96", "GPDIC1",
  "HOANBS", "GDPCTPI", "FEDFUNDS")]
x <- fred_transform(x, codes = c(4, 4, 4, 4, 4, 1))
@

<<timeseries, fig=TRUE, eval=TRUE, echo=FALSE, include=FALSE, width=6, height=3.4>>=
op <- par(mfrow = c(2, 3), mar = c(3, 3, 1, 0.5), mgp = c(2, 0.6, 0))
plot(as.Date(rownames(x)), x[ , "GDPC1"], type = "l",
  xlab = "Time", ylab = "Gross domestic product")
plot(as.Date(rownames(x)), x[ , "PCECC96"], type = "l",
  xlab = "Time", ylab = "Consumption expenditure")
plot(as.Date(rownames(x)), x[ , "GPDIC1"], type = "l",
  xlab = "Time", ylab = "Private investment")
plot(as.Date(rownames(x)), x[ , "HOANBS"], type = "l",
  xlab = "Time", ylab = "Total hours worked")
plot(as.Date(rownames(x)), x[ , "GDPCTPI"], type = "l",
  xlab = "Time", ylab = "GDP deflator")
plot(as.Date(rownames(x)), x[ , "FEDFUNDS"], type = "l",
  xlab = "Time", ylab = "Federal funds rate")
par(op)
@

\begin{figure}[ht]
	\centering
  \includegraphics[width=0.9\textwidth]{fig-timeseries.pdf}
	\caption{Transformed time series under consideration.}
	\label{fig:timeseries}
\end{figure}

% \newpage % sed_add - hard-formatted
\subsection{Prior setup and further configuration} \label{subsec:setup}

% Introduce bv_
After preparing the data, we are ready to specify priors and configure our model.
Functions related to estimation setup and configuration share the prefix \code{bv_}. They are grouped in this way to make them easily discernible and their documentations accessible, facilitating their use.
This contrasts methods and functions for analysis, which stick closely to idiomatic \proglang{R}.

% Priors, start with Minnesota
Priors are set up using \code{bv_priors()}, which holds arguments for the Minnesota and dummy-observation priors as well as the hierarchical treatment of their hyperparameters.
We start by adjusting the Minnesota prior using \code{bv_minnesota()}. The prior hyperparameter $\lambda$ has a Gamma hyperprior and is handed upper and lower bounds for its Gaussian proposal distribution in the MH step. For this example, we do not treat $\alpha$ hierarchically, meaning it can be fixed via the \code{mode} argument. The prior variance on the constant term of the model (\code{var}) is dealt a large value, for a diffuse prior. We leave $\boldsymbol{\Psi}$ to be set automatically --- i.e., to the square root of the innovations variance, after fitting AR($p$) models to each of the variables.

<<minnesota, eval=TRUE, echo=TRUE>>=
mn <- bv_minnesota(
  lambda = bv_lambda(mode = 0.2, sd = 0.4, min = 0.0001, max = 5),
  alpha = bv_alpha(mode = 2), var = 1e07)
@

% Dummy priors
We also include the sum-of-coefficients and single-unit-root priors --- two pre-constructed dummy-observation priors. The hyperpriors of their key hyperparameters are assigned Gamma distributions, with specification working in the same way as for $\lambda$. Custom dummy-observation priors can be set up similarly via \code{bv_dummy()} and require an additional function to construct the observations (see Appendix~\ref{app:dummy} for a demonstration).

<<dummies, eval=TRUE, echo=TRUE>>=
soc <- bv_soc(mode = 1, sd = 1, min = 1e-04, max = 50)
sur <- bv_sur(mode = 1, sd = 1, min = 1e-04, max = 50)
@

% Wrap up priors
Once the priors are defined, we provide them to \code{bv_priors()}. The dummy-observation priors are captured by the ellipsis argument (\code{...}) and need to be named. Via \code{hyper} we choose which hyperparameters should be treated hierarchically. Its default setting (\code{"auto"}) includes $\lambda$ and the key hyperparameters of all provided dummy-observation priors. In our case, this is equivalent to providing the character vector \code{c("lambda", "soc", "sur")}. Hyperparameters that are not treated hierarchically, e.g., $\alpha$, are treated as fixed and set equal to their \code{mode}.

<<priors, eval=TRUE, echo=TRUE>>=
priors <- bv_priors(hyper = "auto", mn = mn, soc = soc, sur = sur)
@

% Metropolis-Hastings
As a final step before estimation, we adjust the MH algorithm via \code{bv_metropolis()}. The function allows fine-tuning the exploration of the posterior space --- a vital prerequisite for proper inference.
The primary argument is \code{scale_hess}, a scalar or vector. It allows scaling the inverse Hessian, which is used as VCOV matrix of the Gaussian proposal distribution for the hierarchically treated hyperparameters. This affords us the flexibility of individually tweaking the posterior exploration of hyperparameters.
Scaling can be complemented by setting \code{adjust_acc = TRUE}, which enables automatic scale adjustment. This happens during an initial share of the burn-in period, adaptable via \code{adjust_burn}. Automatic adjustment is performed iteratively by \code{acc_change} percent, until an acceptance rate between \code{acc_lower} and \code{acc_upper} is reached.

<<metropolis, eval=TRUE, echo=TRUE>>=
mh <- bv_metropolis(scale_hess = c(0.05, 0.0001, 0.0001),
  adjust_acc = TRUE, acc_lower = 0.25, acc_upper = 0.45)
@

% Wrap up setup
After configuring the model's priors and the MH step we are ready for estimation. Further available configuration options for the MCMC method, IRF, FEVD, and forecasts are described in the following paragraphs.
On the one hand, the available settings permit users to tailor models and specific components to their individual needs. This enables them to address an extensive set of research questions.
On the other hand, much simpler and quicker utilization is possible --- the default settings provide a suitable point of departure for many applications and the hierarchical approach brings additional parameter flexibility.
This enables users to (1) focus on critical parts of their model and (2) use \pkg{BVAR} with ease, facilitating gradual adoption and fine-tuning of models.

\subsection{Estimation of the model}

% Present estimation
Models are estimated using the core function \code{bvar()}.
We need to provide our prepared data and a lag order $p$ as arguments, as the bare minimum. Additionally, we pass on our customization objects from above to their respective arguments.
We also define the total number of iterations with \code{n_draw}, the number of initial iterations to discard with \code{n_burn}, and a denominator for the fraction of draws to store via \code{n_thin}. We increase the number of total and burnt iterations, while retaining all draws.
Note that arguments for computing IRF, FEVD, and forecasts are also available and work similarly to the ex-post calculation that is demonstrated below.
When estimating the model, \code{verbose = TRUE} prompts printing of intermediate results and enables a progress bar during the MCMC step.

<<bvar, eval=TRUE, echo=TRUE, cache=TRUE, results=hide>>=
run <- bvar(x, lags = 5, n_draw = 15000, n_burn = 5000, n_thin = 1,
  priors = priors, mh = mh, verbose = TRUE)
@

% Hardcoded due to progress bar - care for proper values % 2020-04-28
\begin{Soutput}
Optimisation concluded.
Posterior marginal likelihood: 3637.405
Hyperparameters: lambda = 1.51378; soc = 0.12618; sur = 0.47674
|==================================================| 100%
Finished MCMC after 16.89 secs.
\end{Soutput}

% Describe outputs
The return value is an object of class `\code{bvar}' --- a named list with several outputs. These include the parameters of interest, i.e., posterior draws of the VAR coefficients, the VCOV matrix, and hierarchically treated hyperparameters.
Other content includes the values of the marginal likelihood for each draw, starting values of the prior hyperparameters obtained from \code{optim()}, prior settings provided, as well as ones set automatically, and the original call to the \code{bvar()} function.
A variety of meta information is included as well --- e.g., the number of accepted draws, variable names, and time spent calculating.
In addition, there are slots for the outputs of IRF, FEVD, and forecasts. They are filled automatically if calculated, or can be appended ex-post via replacement functions.
Outputs can be accessed manually or via a multitude of functions and methods for analysis.

\subsection{Analyzing outputs}\label{subsec:assess}

% Introduce methods and generics
\pkg{BVAR} provides a range of standard methods for objects of type `\code{bvar}' and derivatives, which facilitate cursory assessments and detailed analysis. These include \code{print()}, \code{plot()}, and \code{summary()} methods, as well as a \code{predict()} method and an \code{irf()} generic.
The \code{print()} method provides some meta information, details on hierarchically treated prior hyperparameters and their starting values obtained from optimization via \code{optim()}. The \code{summary()} method mimics its counterpart in \pkg{vars}, including information on the VAR model's log-likelihood, coefficients, and the VCOV matrix. These are also available via the methods \code{logLik()}, \code{coef()}, and \code{vcov()}, respectively.
Other established methods, such as \code{fitted()}, \code{density()} and \code{residuals()}, are also provided. They operate on all posterior draws and include clear and concise \code{print()} methods.

% Demonstrate some
For our example, we access an overview of our estimation using \code{print()}. Then we use \code{plot()} to assess convergence of the MCMC algorithm, which is essential for its stability. By default, the method provides trace and density plots of the ML and the hierarchically treated hyperparameters (see Figure~\ref{fig:trace_density}). Burnt draws are not included and parameter boundaries are plotted as dashed gray lines.
The plot can be subset to specific hyperparameters or autoregressive coefficients via the \code{vars} argument (see Figure~\ref{fig:betas}). The arguments \code{var_response} and \code{var_impulse} provide a concise alternative way of retrieving autoregressive coefficients. We can also use the \code{type} argument to choose a specific type of plot.

<<print, eval=TRUE, echo=TRUE, results=hide>>=
print(run)
@

% Hardcoded to match outputs above % 2020-04-28
\begin{Soutput}
  Bayesian VAR consisting of 238 observations, 6 variables and 5 lags.
  Time spent calculating: 16.89 secs
  Hyperparameters: lambda, soc, sur
  Hyperparameter values after optimisation: 1.51378, 0.12618, 0.47674
  Iterations (burnt / thinning): 15000 (5000 / 1)
  Accepted draws (rate): 3933 (0.393)
\end{Soutput}

<<trace_density, eval=FALSE, echo=TRUE>>=
plot(run)
plot(run, type = "dens",
  vars_response = "GDPC1", vars_impulse = "GDPC1-lag1")
@

<<trace_density, fig=TRUE, png=TRUE, eval=TRUE, echo=FALSE, include=FALSE, width=6, height=6>>=
plot(run)
@

\begin{figure}[!ht]
	\centering
  \includegraphics[width=0.9\textwidth]{fig-trace_density.png}
	\caption{Trace and density plots of all hierarchically treated hyperparameters and the ML.}
	\label{fig:trace_density}
\end{figure}

<<betas, fig=TRUE, eval=TRUE, echo=FALSE, include=FALSE, width=5, height=3>>=
plot(run, type = "dens",
  vars_response = "GDPC1", vars_impulse = "GDPC1-lag1")
@

\begin{figure}[!ht]
	\centering
  \includegraphics[width=0.51\textwidth]{fig-betas.pdf}
	\caption{Density plot for the autoregressive coefficient corresponding to the first lag of GDP in the GDP equation.}
	\label{fig:betas}
\end{figure}

% Demonstrate some more
Visual inspection of trace and density plots suggests convergence of the key hyperparameters. The chain appears to be exploring the posterior rather well; no glaring outliers are recognizable.
However, as a supplement to this examination, one might want to employ additional convergence diagnostics. The \pkg{coda} package provides, among many other useful functionalities, several such statistics that can be accessed using \pkg{BVAR}'s \code{as.mcmc()} method. An illustration of the interface, the use of diagnostics, and parallel execution of \code{bvar()} is provided in Appendix~\ref{app:conv}.
Given proper convergence, we may be interested in fitted and residual values (see Figure~\ref{fig:residuals}). We set \code{type = "mean"} to use the mean of posterior draws. Alternatively, credible bands can be computed via the \code{conf_bands} argument.

<<fitted, eval=TRUE, echo=TRUE>>=
fitted(run, type = "mean")
@


<<residuals, fig=TRUE, eval=TRUE, echo=TRUE, include=FALSE, width=8, height=4>>=
plot(residuals(run, type = "mean"), vars = c("GDPC1", "PCECC96"))
@

\begin{figure}[!ht]
  \centering
  \includegraphics[width=0.9\textwidth]{fig-residuals.pdf}
  \caption{Residual plots of GDP and private consumption expenditure.}
  \label{fig:residuals}
\end{figure}

% IRF
Structural analysis with \pkg{BVAR} works in a straightforward fashion. Impulse response functions are handled in a specific object, with the associated generic function \code{irf()}. The function is used for computing, accessing, and storing IRF in the respective slot of a `\code{bvar}' object as well as updating credible bands.
Forecast error variance decompositions rely on IRF and are nested in the respective object. They can be accessed directly with the generic function \code{fevd()}.
Configuration options for IRF and FEVD are available via the ellipsis argument of \code{irf()}, or the helper function \code{bv_irf()}. They include the horizon to be considered, whether or not FEVD should be computed, and further settings regarding identification. By default, the shocks under scrutiny are identified via short-term zero restrictions. Identification can also be achieved in other ways (see Appendix~\ref{app:signs} for an example of imposing sign restrictions) or skipped entirely.
IRF objects feature methods for plotting, printing, and summarizing. The \code{plot()} method has options to subset the plots to specific impulses and/or responses via name or position of the variable. Credible bands are visualized as lines or shaded areas with the \code{area} argument toggled.
In the example below, we customize our IRF using \code{bv_irf()}, then compute and store them with \code{irf()}. We plot the IRF of certain shocks and variables by specifying them with \code{vars_impulse} and \code{vars_response} in Figure~\ref{fig:irf_cholesky}.

<<irf, eval=TRUE, echo=TRUE>>=
opt_irf <- bv_irf(horizon = 16, identification = TRUE)
irf(run) <- irf(run, opt_irf, conf_bands = c(0.05, 0.16))
@
<<irf_cholesky, fig=TRUE, eval=TRUE, echo=TRUE, include=FALSE, width=7, height=4.2>>=
plot(irf(run), area = TRUE,
  vars_impulse = c("GDPC1", "FEDFUNDS"), vars_response = c(1:2, 6))
@

\begin{figure}[ht]
  \centering
  \includegraphics[width=1\textwidth]{fig-irf_cholesky.pdf}
  \caption{Impulse responses of GDP, private consumption expenditure and the federal funds rate to an aggregate demand shock (left panels) and a monetary policy shock (right panels). Shaded areas refer to the 90\% and the 68\% credible sets.}
  \label{fig:irf_cholesky}
\end{figure}

% Forecasts
Forecasting with \pkg{BVAR} is facilitated by a \code{predict()} method and a specific object for forecasts.
The method works similarly to \code{irf()}, with functionality to compute, access, and store outputs in the respective slot of a `\code{bvar}' object and to update credible bands.
Settings can also be accessed via the ellipsis argument or the helper function \code{bv_fcast()}. For unconditional forecasts only the forecasting horizon is required. In order to conduct scenario analyses based on conditional forecasts, further settings have to be passed on (see Appendix~\ref{app:cond} for a demonstration).
Forecast objects feature methods for plotting, printing, and summarizing.
The \code{vars} argument of the \code{plot()} method can be used to subset plots to certain variables. The visualization can include a number of realized values before the forecast, which is set with the argument \code{t_back} (see Figure~\ref{fig:predict_unconditional}).

<<predict, eval=TRUE, echo=TRUE>>=
predict(run) <- predict(run, horizon = 16, conf_bands = c(0.05, 0.16))
@
<<predict_unconditional, fig=TRUE, eval=TRUE, echo=TRUE, include=FALSE, width=7, height=4.2>>=
plot(predict(run), area = TRUE, t_back = 32,
  vars = c("GDPC1", "GDPCTPI", "FEDFUNDS"))
@

\begin{figure}[ht]
  \centering
  \includegraphics[width=0.9\textwidth]{fig-predict_unconditional.pdf}
  \caption{Unconditional forecasts for GDP, the GDP deflator and the federal funds rate. Shaded areas refer to the 90\% and the 68\% credible sets.}
  \label{fig:predict_unconditional}
\end{figure}

% Conclude demonstration
This concludes the demonstration of setup, estimation, and analysis with \pkg{BVAR}. A comprehensive description of the package and available functionalities is provided in the package documentation. Some more advanced and specific features, including identification via sign restrictions, conditional forecasts, and the interface to \pkg{coda}, are demonstrated in the Appendix.


%% -- Summary/conclusions/discussion -------------------------------------------

\clearpage
\section{Conclusion} \label{sec:concl}

% Summarise & conclude
This article introduced \pkg{BVAR}, an \proglang{R}~package that implements Bayesian vector autoregressions with hierarchical prior selection. It offers a flexible, but structured and transparent tool for multivariate time series analysis and can be used to assess a wide range of research questions. By means of an applied example, we illustrated the usage of the package and explained its implementation and configuration.
The hierarchical prior selection mitigates subjective choices, improving flexibility and counteracting a common critique of Bayesian methods.
Accessible helper functions for customization as well as comprehensive methods and generic functions for analysis top off an extensive set of features. The functional style and idiomatic implementation in \proglang{R} make the package easy to use, extensible, and transparent.

% Outlook
\pkg{BVAR} bridges a gap as an accessible all-purpose tool for Bayesian VAR models, but leaves plenty of potential for extensions and novel libraries.
Smooth integration into existing workflows and software ecosystems offers considerable potential, as demonstrated by the interface to \pkg{coda}. The \pkg{BVARverse} package pursues closer adherence to tidy workflows, facilitating the use of established tools for analysis and visualization. Future development could target a range of useful packages, such as \pkg{tidybayes} \citep{kay2020}, or aim at harmonizing VAR functionalities.
A major area for future work is support for a broader spectrum of prior families. This would help enrich analysis and could incorporate e.g., heteroskedastic error structures in various forms. A powerful library implementing these features would be a valuable asset and complement \pkg{BVAR} and similar \proglang{R} packages.


%% -- Optional special unnumbered sections -------------------------------------

% \clearpage

\section*{Computational details}

The results in this paper were obtained using \proglang{R}~4.0.2 with \pkg{BVAR}~1.0.1, \pkg{mvtnorm}~{1.1-1}, and \pkg{coda}~{0.19-3}. The machine used to generate the results is running Windows 10 with an i5-4670k and 16 GB RAM. The scripts were also tested on a machines running Debian Sid with an i7-7500U and 16 GB RAM and a machine running Ubuntu 20.04 LTS with a Ryzen-3500U and 16 GB RAM.
\proglang{R} and all packages used are available from the Comprehensive \proglang{R} Archive Network (CRAN) at \url{https://CRAN.R-project.org/}.


\section*{Acknowledgments}

We would like to thank Victor Maus and Florian Huber for helpful comments and encouragement, the staff at the St.~Louis Federal Reserve for their work on FRED-QD and FRED-MD, and the JSS editorial team for improvement suggestions. Our thanks also go out to Gregor Kastner, Casper Engelen, Daran Demirol, Maximilian B\"ock, Michael Pfarrhofer, Niko Hauzenberger, Sebastian Luckeneder, and two anonymous referees.
% Not ordered


%% -- Bibliography -------------------------------------------------------------
%% - References need to be provided in a .bib BibTeX database.
%% - All references should be made with \cite, \citet, \citep, \citealp etc.
%%   (and never hard-coded). See the FAQ for details.
%% - JSS-specific markup (\proglang, \pkg, \code) should be used in the .bib.
%% - Titles in the .bib should be in title case.
%% - DOIs should be included where available.

\clearpage

\bibliography{refs}


%% -- Appendix (if any) --------------------------------------------------------
%% - After the bibliography with page break.
%% - With proper section titles and _not_ just "Appendix".

\newpage
\begin{appendix}

\section{Data transformation and stationarity}\label{app:data}

Econometric analysis often relies on data coming in a specific format, frequently requiring transformation. The helper function \code{fred_transform()} facilitates this work step. It can be used to apply common transformations, subset data to a rectangular format without missing values, and automatically transform the included FRED-QD and FRED-MD datasets.
Both datasets are packaged in their raw format, but \cite{mccracken2016,mccracken2020} provide suggested transformations in their online appendices (see \url{https://research.stlouisfed.org/econ/mccracken/fred-databases/}). These transformations can also be looked up manually with \code{fred_code()}.

Below, we demonstrate this using a prototypical monetary VAR model --- covering GDP, the GDP deflator and the federal funds rate. For this example, we are looking to transform the individual time series to be stationary.
To automatically apply the suggested transformation of \cite{mccracken2016} we simply specify the \code{type} --- either \code{"fred_qd"} or \code{"fred_md"} --- and call \code{fred_transform()} with our data.
In a second attempt, we deviate from the suggestions and directly provide transformation codes to the \code{codes} argument. This way we can apply the transformations to any suitable dataset. The function displays available codes when called without arguments. For this example, we choose \code{5} for log-differences and \code{1} for no transformation. We also set \code{lag = 4} for yearly differences of our quarterly data. Figure~\ref{fig:app_timeseries} shows the transformed time series, which will be used to illustrate additional features below.

<<app_data, eval=TRUE, echo=TRUE, results=hide>>=
y <- fred_qd[1:243, c("GDPC1", "GDPCTPI", "FEDFUNDS")]
z <- fred_transform(y, type = "fred_qd")
y <- fred_transform(y, codes = c(5, 5, 1), lag = 4)
@

<<app_timeseries, fig=TRUE, eval=TRUE, echo=FALSE, include=FALSE, width=6, height=1.8>>=
op <- par(mfrow = c(1, 3), mar = c(3, 3, 1, 0.5), mgp = c(2, 0.6, 0))
plot(as.Date(rownames(y)), y[ , "GDPC1"], type = "l",
  xlab = "Time", ylab = "GDP growth")
plot(as.Date(rownames(y)), y[ , "GDPCTPI"], type = "l",
  xlab = "Time", ylab = "Inflation")
plot(as.Date(rownames(y)), y[ , "FEDFUNDS"], type = "l",
  xlab = "Time", ylab = "Federal funds rate")
par(op)
@

\begin{figure}[ht]
  \centering
  \includegraphics[width=0.9\textwidth]{fig-app_timeseries.pdf}
  \caption{Transformed time series under consideration for a prototypical VAR model.}
  \label{fig:app_timeseries}
\end{figure}

When estimating a VAR model in differences, implications on the priors need to be taken into account. The sum-of-coefficients prior and the single-unit-root prior are no longer applicable.
For this example we only impose the Minnesota prior, which also needs to be adjusted slightly to still carry the notion that variables follow a random walk. This is done by setting the prior mean \code{b = 0} in \code{bv_mn()}, or its alias \code{bv_minnesota()}. The argument may also be provided in a vector or a matrix format, in case the variables differ in their order of integration.

<<app_bvar, eval=TRUE, echo=TRUE, cache=TRUE, results=hide>>=
priors_app <- bv_priors(mn = bv_mn(b = 0))
run_app <- bvar(y, lags = 5, n_draw = 15000, n_burn = 5000,
  priors = priors_app, mh = bv_mh(scale_hess = 0.5, adjust_acc = TRUE),
  verbose = FALSE)
@


\section{Construction of custom dummy priors} \label{app:dummy}

Here, we demonstrate the construction of custom dummy priors using \code{bv_dummy()}. As an example, the sum-of-coefficients prior is reconstructed manually.

A custom prior requires a function to construct artificial observations. This function should take at least three arguments --- the data as a numeric matrix, an integer with the number of lags, and the value of the prior hyperparameter. The return value is a list, containing two numeric matrices, \code{Y} and \code{X}, with artificial observations to stack on top of the data matrix and the lagged data matrix.
For the sum-of-coefficients prior we follow the procedure outlined in Section~\ref{subsec:prior}.

<<app_dummies, eval=TRUE, echo=TRUE>>=
add_soc <- function(Y, lags, par) {
  soc <- if(lags == 1) {diag(Y[1, ]) / par} else {
    diag(colMeans(Y[1:lags, ])) / par
  }
  X_soc <- cbind(rep(0, ncol(Y)), matrix(rep(soc, lags), nrow = ncol(Y)))
  return(list("Y" = soc, "X" = X_soc))
}
@

This function is then passed to \code{bv_dummy()} via the argument \code{fun}. The remaining arguments work in the same way as for other prior constructors (see Section~\ref{subsec:setup}). They determine the hyperprior distribution and boundaries for the proposal distribution. Again, if not treated hierarchically, the prior hyperparameter is set to its mode.
The output of \code{bv_dummy()} is then passed to the ellipsis argument of \code{bv_priors()} and needs to be named. Further steps do not differ from standard procedure --- posterior draws are stored and can be analyzed in the same way as for the Minnesota prior.

<<app_priors, eval=TRUE, echo=TRUE, results=hide>>=
soc <- bv_dummy(mode = 1, sd = 1, min = 0.0001, max = 50, fun = add_soc)
priors_soc <- bv_priors(soc = soc)
@


\section{Convergence assessment and parallelization} \label{app:conv}

Bayesian simulation relies heavily on the convergence of samplers, particularly so for hierarchical models. As demonstrated in Section~\ref{subsec:assess}, \pkg{BVAR} includes functionality to assess convergence visually. Many more tools are available through the interface to \pkg{coda}, which specializes in output assessment for Markov chain Monte Carlo (MCMC) simulations.

To access the interface we call the \code{as.mcmc()} method on the VAR model from Appendix~\ref{app:data}. The method works similarly to \code{plot()}, allowing users to subset hyperparameters or VAR coefficients with \code{vars}, \code{vars_response}, and \code{vars_impulse}.
These are then converted to a `\code{mcmc}' object, which supports thorough analysis of convergence behavior. This includes the diagnostic statistics of \cite{geweke1992}, which can be calculated with \code{geweke.diag()}.

<<app_coda, eval=TRUE, echo=TRUE>>=
library("coda")
run_mcmc <- as.mcmc(run_app, vars = "lambda")
geweke.diag(run_mcmc)
@

The value of the diagnostic statistic constitutes a standard $z$~score and indicates proper within-chain convergence of the hyperparameter $\lambda$.
Still, one may also be interested in the behavior across chains. A suitable diagnostic to assess between-chain convergence was proposed by \cite{gelman1992} and is implemented in \pkg{coda} as \code{gelman.diag()}.
The need for multiple chains makes parallelization attractive, which is supported by the wrapper function \code{par_bvar()}. The wrapper uses \code{parLapply()} from the \pkg{parallel} package \citep{R} to run instances of \code{bvar()} on multiple threads. The output is a `\code{bvar_chains}' object, i.e., a list of `\code{bvar}' objects that is supported by convergence-related methods.
We can visualize the separate runs for different hyperparameters or the ML, which can be specified via the \code{vars} argument, with the \code{plot()} method (see Figure~\ref{fig:app_chains}). We can also transform it for use with \pkg{coda}, by calling \code{as.mcmc()}, which now yields a `\code{mcmc_list}' object.
This object is passed on to \code{gelman.diag()}, in order to compute a diagnostic statistics for between-chain convergence.

% CRAN limits to two cores
<<app_parallel, eval=TRUE, echo=TRUE, cache=TRUE>>=
library("parallel")
n_cores <- 2
cl <- makeCluster(n_cores)

runs <- par_bvar(cl = cl, data = y, lags = 5,
  n_draw = 15000, n_burn = 5000, n_thin = 1,
  priors = priors_app, mh = bv_mh(scale_hess = 0.5, adjust_acc = TRUE))
stopCluster(cl)

runs_mcmc <- as.mcmc(runs, vars = "lambda")
gelman.diag(runs_mcmc, autoburnin = FALSE)
@
<<app_chains, fig=TRUE, png=TRUE, eval=TRUE, echo=TRUE, include=FALSE, width=8, height=2.4>>=
plot(runs, type = "full", vars = "lambda")
@

\begin{figure}[ht]
	\centering
  \includegraphics[width=0.9\linewidth]{fig-app_chains.png}
	\caption{Plots of $\lambda$, the key hyperparameter of the Minnesota prior, for separate runs.}
	\label{fig:app_chains}
\end{figure}


\section{Identification via sign restrictions} \label{app:signs}

Identification is required for meaningful interpretation of impulse response functions. \pkg{BVAR} implements various schemes to achieve identification in a flexible, yet easy-to-use manner. Sign restrictions, one such scheme, are readily available and demonstrated here.

Identification via sign restrictions relies on forming expectations about response directions following certain shocks \citep{rubio-ramirez2010}. These expectations are usually derived from economic theory --- a process that can become cumbersome for high-dimensional models.
Sign identification of shocks in \pkg{BVAR} is set up in \code{bv_irf()}, which can be accessed directly or through the ellipsis argument of \code{irf()}. We toggle \code{identification = TRUE} and provide a matrix $\mathbf{SR}$ with sign restrictions to the \code{sign_restr} argument. $\mathbf{SR}$ is constructed by setting all elements $SR_{ij}$ equal to 1 (-1) if the contemporaneous response of variable $i$ to a shock from variable $j$ is expected to be an increase (decrease). By setting elements equal to 0, we can additionally impose zero restrictions. For an agnostic view, we set the element to \code{NA}; no restrictions are imposed. Note that the shocks need to be uniquely identifiable \citep{kilian2017}.
After running \code{bv_irf()}, we can print the chosen sign restrictions with the object's \code{print()} method. To calculate, we call \code{irf()} as usual, providing the settings to the ellipsis argument. IRF are then calculated based on suitable shocks, which are drawn following the algorithm by \cite{rubio-ramirez2010} or \cite{arias2018} if zero restrictions are imposed. Figure~\ref{fig:app_irf_signs} provides a visualization of the restricted impulse responses.

<<app_signs, eval=TRUE, echo=TRUE, results=hide>>=
sr <- matrix(c(1, 1, 1, -1, 1, NA, -1, -1, 1), ncol = 3)
opt_signs <- bv_irf(horizon = 16, fevd = TRUE,
  identification = TRUE, sign_restr = sr)
print(opt_signs)
@

% Hardcoded due to tabs breaking
\begin{Soutput}
Object with settings for computing impulse responses.
Horizon: 16
Identification: Sign restrictions
Chosen restrictions:
                  Shock to
                  Var1  Var2  Var3
Response of Var1    +     -     -

            Var2    +     +     -

            Var3    +     NA    +
FEVD: TRUE
\end{Soutput}

<<app_irf, eval=TRUE, echo=TRUE>>=
irf(run_app) <- irf(run_app, opt_signs)
@
<<app_irf_signs, fig=TRUE, eval=TRUE, echo=TRUE, include=FALSE, width=6, height=4>>=
plot(irf(run_app), vars_impulse = c(1, 3))
@

\begin{figure}[ht!]
	\centering
  \includegraphics[width=0.9\linewidth]{fig-app_irf_signs.pdf}
	\caption{Set of impulse responses from a prototypical monetary VAR model using sign restrictions to achieve identification. Grey lines refer to the 68\% credible set.}
	\label{fig:app_irf_signs}
\end{figure}

As can be discerned, the shocks identified via sign restrictions allow for contemporaneous relations between all variables --- in contrast to ones obtained via recursive identification.
The instantaneous impacts are in accordance with the imposed restrictions. Note that the credible bands surrounding the median impulse response are somewhat inflated. This stems partly from additional uncertainty that is introduced by drawing random orthogonal matrices in the algorithm.


\section{Conditional forecasts}\label{app:cond}

Conditional forecasts are a useful tool for scenario analysis, where the future path of one or multiple variable(s) is known, or assumed to be known. Here we demonstrate how to conduct such a scenario analysis using \pkg{BVAR} by means of a concise example.

Conditional forecasts rely on fixing the future paths of a number of variables, in order to gain insights into the development of the remaining unconstrained ones \citep{waggoner1999}. This allows researchers to compare different trajectories of policy-relevant quantities and analyze impacts.
In \pkg{BVAR}, we configure conditional forecasts with \code{bv_fcast()}, which can be accessed directly or over the ellipsis argument of \code{predict()}. The assumed future paths are provided to the \code{cond_path} argument as numeric vector or matrix; the names or positions of constrained variables to \code{cond_vars}. For our example, we constrain the federal funds rate to a sharp rise and subsequent drop. The forecasts are then calculated as usual, using the \code{predict()} method.
We use the \code{plot()} method to visualize the conditional forecast. Figure~\ref{fig:app_predict_conditional} shows the constrained federal funds rate together with unconstrained GDP growth and inflation dynamics.

<<app_predict, eval=TRUE, echo=TRUE>>=
path <- c(2.25, 3, 4, 5.5, 6.75, 4.25, 2.75, 2, 2, 2)
predict(run_app) <- predict(run_app, horizon = 16,
  cond_path = path, cond_var = "FEDFUNDS")
@
<<app_predict_conditional, fig=TRUE, eval=TRUE, echo=TRUE, include=FALSE, width=6, height=4>>=
plot(predict(run_app), t_back = 16)
@

\begin{figure}[ht]
	\centering
  \includegraphics[width=0.9\textwidth]{fig-app_predict_conditional.pdf}
	\caption{Conditional forecasts for GDP growth, inflation and the federal funds rate. Grey lines refer to the 68\% credible set.}
	\label{fig:app_predict_conditional}
\end{figure}

\clearpage


\end{appendix}

%% -----------------------------------------------------------------------------


\end{document}