Skip to content

Commit ae5df0d

Browse files
Merge pull request #3 from SciML/myb/dae_adjoint
Add DAE adjoint write-up
2 parents d87aa69 + 3b49d7f commit ae5df0d

File tree

5 files changed

+277
-0
lines changed

5 files changed

+277
-0
lines changed

dae_adjoint/Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
LL := latexmk
2+
DEP := $(wildcard *.tex)
3+
MAIN=main
4+
5+
main: ${DEP}
6+
${LL} -f -pdf ${MAIN} -auxdir=output -outdir=output

dae_adjoint/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Derivation of semi-explicit DAE adjoint
2+
3+
The PDF is at output/main.pdf.

dae_adjoint/main.tex

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
% main.tex
2+
\documentclass[a4paper,9pt]{article}
3+
\usepackage{amsmath,amssymb,amsthm,amsbsy,amsfonts}
4+
\usepackage{systeme}
5+
\usepackage{physics}
6+
\usepackage{cleveref}
7+
\newcommand{\correspondsto}{\;\widehat{=}\;}
8+
\usepackage{bm}
9+
\usepackage{enumitem} % label enumerate
10+
\newtheorem{theorem}{Theorem}
11+
\theoremstyle{definition}
12+
\newtheorem{definition}{Definition}[section]
13+
\theoremstyle{remark}
14+
\newtheorem*{remark}{Remark}
15+
% change Q.D.E symbol
16+
\renewcommand\qedsymbol{$\hfill \mbox{\raggedright \rule{0.1in}{0.2in}}$}
17+
18+
\begin{document}
19+
20+
\author{Yingbo Ma\\
21+
\tt{mayingbo5@gmail.com}}
22+
\title{Sensitivity of differential-algebraic equations}
23+
\date{March 12, 2020}
24+
25+
\maketitle
26+
27+
\section{Notation}
28+
\begin{itemize}
29+
\item $M \in \mathbb{C}^{m\times m}$ denotes a mass matrix.
30+
31+
\item $t_0 < t_1\in \mathbb{C}$ denote constants.
32+
33+
\item $t \in \mathbb{C}$ denotes independent variable.
34+
35+
\item $p\in \mathbb{C}^n$ denotes parameters.
36+
37+
\item $u(p, t): \mathbb{C}^n \times \mathbb{C}\mapsto \mathbb{C}^m$ denotes
38+
states.
39+
40+
\item $f\qty(u(p, t), p, t): \mathbb{C}^m \times \mathbb{C}^n \times
41+
\mathbb{C}\mapsto \mathbb{C}^{m}$ denotes the right-hand-side of a
42+
differential equation.
43+
44+
\item $S(p, t): \mathbb{C}^n \times \mathbb{C}\mapsto \mathbb{C}^{m\times n}$
45+
denotes $\pdv{u}{p}$ (sensitivity with respect to the states).
46+
47+
\item $J\qty(u(p, t), p, t): \mathbb{C}^m \times \mathbb{C}^n \times
48+
\mathbb{C} \mapsto \mathbb{C}^{m\times m}$ denotes $\pdv{f}{u}$ (Jacobian of
49+
the differential equation with respect to the states).
50+
51+
\item $g(u(p, t), p, t): \mathbb{C}^m \times \mathbb{C}^n \times \mathbb{C}
52+
\mapsto \mathbb{C}$ is a cost function that is sufficiently smooth.
53+
54+
\item $\lambda(u(p, t), p, t, t_0, t_1): \mathbb{C}^m \times \mathbb{C}^n
55+
\times \mathbb{C}\times \mathbb{C}\times \mathbb{C} \mapsto \mathbb{C}^n$
56+
denotes Lagrange multiplier.
57+
58+
\item $\lambda_\tau$ denotes $\pdv{\lambda}{\tau}$.
59+
\end{itemize}
60+
61+
\section{Introduction and Forward Sensitivity Analysis}
62+
In many applications, we may wish to compute the gradient of the continuous cost
63+
function
64+
\begin{equation}
65+
G[u] = \int_{t_0}^{t_1} g(u(p, t), p, t) \dd{t}
66+
\end{equation}
67+
with respect to the parameters $\dv{G}{p}$, where $u$ is a function that
68+
satisfies the differential-algebraic equation
69+
\begin{align} \label{eq:de}
70+
M\dot{u} &= f(u(p, t), p, t), \\
71+
u(t_0) &= u_0(p).
72+
\end{align}
73+
Na\"ively, we could apply Leibniz rule for integration and obtain:
74+
\begin{align}
75+
\dv{G}{p} &= \dv{p} \int_{t_0}^{t_1} g(u(p, t), p) \dd{t} \\
76+
&= \int_{t_0}^{t_1} \dv{p} g(u(p, t), p) \dd{t} \\
77+
&= \int_{t_0}^{t_1} \pdv{g}{u}\pdv{u}{p} + \pdv{g}{p} \dd{t} \\
78+
&= \int_{t_0}^{t_1} \pdv{g}{u}S + \pdv{g}{p} \dd{t}
79+
\label{eq:dgdp}
80+
\end{align}
81+
We can get $S$ by differentiating \cref{eq:de} with respect to $p$ both sides,
82+
\begin{align}
83+
&\dv{p}\dv{t}Mu = \dv{p}f(u(p, t), p, t) \\
84+
\implies &M\dv{t}\dv{u}{p} = \pdv{f}{u}\dv{u}{p} + \pdv{f}{p} \\
85+
\implies &M\dot{S} = JS + \pdv{f}{p} \label{eq:forward_sens}.
86+
\end{align}
87+
\cref{eq:forward_sens} is often referred as the forward sensitivity equation.
88+
It is apparent that computing $S$ can be intractable when there are large number
89+
of parameters, because we would have to solve a $m\times n$ differential
90+
equation.
91+
92+
\section{Adjoint Sensitivity Analysis with Continuous Cost Function}
93+
To alleviate the computational cost of the forward sensitivity equation, we
94+
could add a ``$0$'' to $\dv{G}{p}$, and get:
95+
\begin{align}
96+
\dv{G}{p} = &\dv{I}{p} \\
97+
= &\int_{t_0}^{t_1} \pdv{g}{u}S + \pdv{g}{p} -
98+
\lambda^*\qty(\underbrace{M\dot{S} -
99+
JS - \pdv{f}{p}}_{0 \quad\cref{eq:forward_sens}}) \dd{t}.
100+
\end{align}
101+
The motivation of this step is to introduce $\dot{S}$ and an
102+
\textbf{\emph{arbitrary}} $\lambda$ function, and the hope is to use the classic
103+
technique of integration by parts to group the $S$ terms and choose the
104+
$\lambda$ function such that the gradient is \textbf{\emph{independent}} of
105+
$S$.
106+
107+
After integration by parts, the $\lambda^* M\dot{S}$ term is
108+
\begin{align}
109+
\int_{t_0}^{t_1} \lambda^* M\dot{S} \dd{t} = \eval{\lambda^* MS}_{t_0}^{t_1} -
110+
\int_{t_0}^{t_1} \dot{\lambda}^* MS \dd{t}.
111+
\end{align}
112+
The gradient expression after grouping is then:
113+
\begin{align} \label{eq:simplified_sens}
114+
\dv{G}{p} &= \int_{t_0}^{t_1} \pdv{g}{u}S + \pdv{g}{p}
115+
+ \dot{\lambda}^* MS + \lambda^* JS+ \lambda^* \pdv{f}{p} \dd{t}
116+
- \eval{\lambda^* MS}_{t_0}^{t_1} \nonumber \\
117+
&= \int_{t_0}^{t_1} \qty(\pdv{g}{u} + \dot{\lambda}^*M + \lambda^*
118+
J)S +\lambda^* \pdv{f}{p} + \pdv{g}{p} \dd{t}
119+
- \eval{\lambda^* MS}_{t_0}^{t_1}
120+
\end{align}
121+
It becomes obvious that we can impose the condition
122+
\begin{equation} \label{eq:cond}
123+
\pdv{g}{u} + \dot{\lambda}^*M + \lambda^* J = \bm{0} \qq{and}
124+
\lambda(t_1)^* M = \bm{0}^*,
125+
\end{equation}
126+
to make \cref{eq:simplified_sens} independent of $S$.
127+
128+
After rearranging \cref{eq:simplified_sens} and \cref{eq:cond}, we have
129+
\begin{align}
130+
M^*\dot{\lambda} &= -J^* \lambda - \pdv{g}{u}^*, \label{eq:adj_eq_cont}\\
131+
M^* \lambda(t_1) &= \bm{0} \label{eq:init_cont}\\
132+
\dv{G}{p} &= \int_{t_0}^{t_1} \lambda^* \pdv{f}{p} + \pdv{g}{p} \dd{t}
133+
+ \eval{\lambda^* MS}_{t=t_0}. \label{eq:sens_int}
134+
\end{align}
135+
Here, we want to remark that the $\eval{\lambda^* MS}_{t=t_0}$ term is zero if
136+
the initial condition of \cref{eq:de} is independent of the parameters.
137+
138+
\section{Adjoint Sensitivity Analysis with Discrete Cost Function}
139+
In many cases, we only want to compute sensitivity at specific a time point
140+
$\tau$, i.e. $\eval{\pdv{g(u(p, t), p, t)}{p}}_{t=\tau}$. In these cases, we
141+
call the cost function discrete.
142+
143+
To reuse our previous result, we need to find a way to relate $\dv{g}{p}$ to
144+
$\dv{G}{p}$. There are two options, using Dirac delta distribution or using
145+
Leibniz integration rule. We pick the latter because it is more straight
146+
forward.
147+
148+
Note we have
149+
\begin{align}
150+
\dv{p} \dv{\tau} G[u] &= \dv{p} \dv{\tau} \int_{t_0}^{\tau} g(u(p, t), p, t) \dd{t} \\
151+
&= \dv{p}\qty[g(u, p, \tau) + \int_{t_0}^{\tau}
152+
\pdv{g}{\tau} \dd{t}] \\
153+
&= \dv{g}{p} + \int_{t_0}^{\tau}
154+
\pdv{g}{\tau}{p} \dd{t}, \label{eq:dgdp1}
155+
\end{align}
156+
and
157+
\begin{align}
158+
\dv{\tau} \dv{p} G[u] &= \dv{\tau} \qty[\int_{t_0}^{\tau} \lambda^* \pdv{f}{p} +
159+
\pdv{g}{p} \dd{t} + \eval{\lambda^* MS}_{t=t_0}] \qq{\cref{eq:sens_int}}\\
160+
&= \eval{\qty(\lambda^* \pdv{f}{p} + \pdv{g}{p})}_{t=\tau} +
161+
\int_{t_0}^\tau \lambda_\tau^* \pdv{f}{p} +
162+
\underbrace{\lambda^* \pdv{f}{p}{\tau}}_{=0}\dd{t}
163+
+ \int_{t_0}^\tau \pdv{g}{\tau}{p} \dd{t} +
164+
\eval{\lambda_\tau^* MS}_{t=t_0}. \label{eq:dgdp2}
165+
\end{align}
166+
By comparing \cref{eq:dgdp1} and \cref{eq:dgdp2}, we can conclude that
167+
\begin{equation} \label{eq:sens_int_dis}
168+
\dv{g}{p} = \eval{\qty(\lambda^* \pdv{f}{p} + \pdv{g}{p})}_{t=\tau} +
169+
\int_{t_0}^\tau \lambda_\tau^* \pdv{f}{p} \dd{t} +
170+
\eval{\lambda_\tau^* MS}_{t=t_0}.
171+
\end{equation}
172+
173+
Now we need $\lambda_\tau$ to compute the sensitivity integral
174+
\cref{eq:sens_int_dis}. It can be obtained by differentiating
175+
\cref{eq:adj_eq_cont},
176+
\begin{align}
177+
M^*\dot{\lambda_\tau} &= -J^* \lambda_\tau - \pdv{g}{\tau} \\
178+
&= -J^* \lambda_\tau \qq{$g$ does not
179+
depend on $\tau$}. \label{eq:adj_eq_dis}
180+
\end{align}
181+
182+
We want to remark that from \cref{eq:adj_eq_dis}, we can see that the
183+
initialization of $\lambda_\tau$ cannot be trivial, since
184+
$\lambda_\tau(\tau) = \bm{0}$ can only result in uninteresting dynamics.
185+
186+
It is important to note that $\lambda$ dependents on not only $t$ but also
187+
$\tau$ with a discrete cost function, since $\lambda_\tau$ is non-zero.
188+
189+
To obtain the initialization of $\eval{\lambda_\tau}_{t=\tau}$, we can
190+
differentiate $\eval{\lambda(t, \tau)}_{t=\tau}$ in the constraint
191+
\cref{eq:init_cont} with respect to $\tau$,
192+
\begin{equation}
193+
\dv{\tau}\qty(\eval{\lambda(t, \tau)^* M}_{t=\tau}) =
194+
\qty(\underbrace{
195+
\eval{\pdv{\lambda}{t}\pdv{t}{\tau}}_{t=\tau}}
196+
_{\dot{\lambda}})^*
197+
M + \eval{\lambda_\tau^* M}_{t=\tau} = \bm{0}^*.
198+
\end{equation}
199+
Hence, we have
200+
\begin{align}
201+
\eval{\lambda_\tau^* M}_{t=\tau}
202+
&= -\eval{\dot{\lambda}^* M}_{t=\tau} \\
203+
&= \eval{\qty(J^* \lambda + \pdv{g}{u}^*)^*}_{t=\tau} \qq{\cref{eq:adj_eq_cont}}.
204+
\end{align}
205+
206+
Together, the system of equations is,
207+
\begin{align}
208+
M^*\dot{\lambda_\tau} &= -J^* \lambda_\tau \\
209+
\eval{M^* \lambda_\tau}_{t=\tau} &= \eval{\qty(J^* \lambda +
210+
\pdv{g}{u}^*)}_{t=\tau} \\
211+
\dv{g}{p} &= \eval{\qty(\lambda^* \pdv{f}{p} + \pdv{g}{p})}_{t=\tau} +
212+
\int_{t_0}^\tau \lambda_\tau^* \pdv{f}{p} \dd{t} +
213+
\eval{\lambda_\tau^* MS}_{t=t_0}.
214+
\end{align}
215+
216+
\subsection{Example: Index-I differential-algebraic equation}
217+
To handle singular mass matrix, we need to split the problem system into
218+
differential variables $\{\cdot\}^d$ and algebraic
219+
variables $\{\cdot\}^a$, so that $\widetilde{M}$ is fully ranked,
220+
\begin{align}
221+
\mqty(\widetilde{M} & 0 \\ 0 & 0) \mqty(\dot{u}^d\\ \dot{u}^a) &= \mqty(f(u^d,
222+
u^a, p, t)
223+
\\ h(u^d, u^a, p, t)) \\
224+
u^d(t_0) &= u_{d0}(p).
225+
\end{align}
226+
We have
227+
\begin{equation}
228+
J^* = \mqty(\pdv{f}{u^d}^* & \pdv{h}{u^d}^* \\ \pdv{f}{u^a}^* &
229+
\pdv{h}{u^a}^*).
230+
\end{equation}
231+
The initialization step is
232+
\begin{equation}
233+
\mqty(\widetilde{M}^* & 0 \\ 0 & 0) \mqty(\lambda_\tau^d\\ \lambda_\tau^a) =
234+
\mqty(\pdv{f}{u^d}^* & \pdv{h}{u^d}^* \\ \pdv{f}{u^a}^* &
235+
\pdv{h}{u^a}^*) \mqty(\bm{0}\\ \lambda^a) + \mqty(\pdv{g^d}{u}^* \\
236+
\pdv{g^a}{u}^*).
237+
\end{equation}
238+
Expanding the equation we have
239+
\begin{align}
240+
\widetilde{M}^* \lambda_\tau^d &= \pdv{h}{u^d}^* \lambda^a + \pdv{g^d}{u}^*
241+
\label{eq:ex1_sys1} \\
242+
\bm{0} &= \pdv{h}{u^a}^* \lambda^a + \pdv{g^a}{u}^* \implies
243+
\lambda^a = -\qty(\pdv{h}{u^a}^*)^{-1}\pdv{g^a}{u}^* \label{eq:ex1_sys2}
244+
\end{align}
245+
Plugging \cref{eq:ex1_sys2} into \cref{eq:ex1_sys1}, we obtain the
246+
initialization of $\lambda_\tau$
247+
\begin{equation}
248+
\widetilde{M}^* \lambda_\tau^d(\tau) = \eval{\qty(-\pdv{h}{u^d}^*
249+
\qty(\pdv{h}{u^a}^*)^{-1}\pdv{g^a}{u}^* +
250+
\pdv{g^d}{u}^*)}_{t=\tau}.
251+
\end{equation}
252+
253+
\nocite{cao2003adjoint}
254+
255+
\bibliography{reference.bib}
256+
\bibliographystyle{siam}
257+
258+
\end{document}

dae_adjoint/output/main.pdf

186 KB
Binary file not shown.

dae_adjoint/reference.bib

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
@article{cao2003adjoint,
2+
title={Adjoint sensitivity analysis for differential-algebraic equations: The adjoint DAE system and its numerical solution},
3+
author={Cao, Yang and Li, Shengtai and Petzold, Linda and Serban, Radu},
4+
journal={SIAM journal on scientific computing},
5+
volume={24},
6+
number={3},
7+
pages={1076--1089},
8+
year={2003},
9+
publisher={SIAM}
10+
}

0 commit comments

Comments
 (0)