-
Notifications
You must be signed in to change notification settings - Fork 0
/
algorithm.tex
147 lines (138 loc) · 6.59 KB
/
algorithm.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
\begin{algorithm}
\begin{algorithmic}[1]
\Require{$H = ((S, p), \Obs, o)$ : an HMM;
$\pi, \tau$ : state distributions on $S$; $c$ : a
non-negative real number; $k$ : a positive integer}
\Ensure{An SMT query $q$ such that $q$ is unsatisfiable iff
$\Pr (\overline{\omega} | \pi, H) \leq c \cdot
\Pr (\overline{\omega} | \tau, H)$ for every observation
sequences $\overline{\omega}$ of length $k$}
\Function{PufferfishCheck}{$H$, $\pi_0$, $\pi_1$, $c$, $k$}
\For{$s \in S$}
\State{$\alpha_0(s) \leftarrow
\textmd{\textsc{Product}} (\pi(s),
\textmd{\textsc{Select}}(\mathsf{w_0}, \Omega, o(s, \bullet)))$}
\State{$\beta_0(s) \leftarrow
\textmd{\textsc{Product}} (\tau(s),
\textmd{\textsc{Select}}(\mathsf{w_0}, \Omega, o(s, \bullet)))$}
\EndFor
\For{$t \leftarrow 1$ \textbf{to} $k - 1$}
\For{$s' \in S$}
\State{$\alpha_t(s') \leftarrow
\textmd{\textsc{Product}} (
\textmd{\textsc{Dot}} (\alpha_{t-1}, p (\bullet, s')),
\textmd{\textsc{Select}} (\mathsf{w_t}, \Omega, o(s', \bullet)))
$}
\State{$\beta_t(s') \leftarrow
\textmd{\textsc{Product}} (
\textmd{\textsc{Dot}} (\beta_{t-1}, p (\bullet, s')),
\textmd{\textsc{Select}} (\mathsf{w_t}, \Omega, o(s', \bullet)))
$}
\EndFor
\EndFor
\State{\Return{
$\bigwedge_{t=0}^{k-1} \mathsf{w_t} \in \Obs \wedge
\textmd{\textsc{Gt}} (
\textmd{\textsc{Sum}} (\alpha_{k-1}),
\textmd{\textsc{Product}} (c,
\textmd{\textsc{Sum}} (\beta_{k-1}))
)$
}}
\EndFunction
\end{algorithmic}
\caption{Pufferfish Check}
\label{algorithm:pufferfish-check}
\end{algorithm}
Our next goal is to develop an algorithm to verify
$\epsilon$-Pufferfish privacy on any given HMM. Since the problem is
NP-hard, we will employ Satisfiability Modulo Theories (SMT) solvers
in our algorithm. For all observation sequences of length $k$, we will
construct an SMT query to find a sequence violating
$\epsilon$-Pufferfish privacy. If no such sequence can be found, the
given HMM satisfies $\epsilon$-Pufferfish privacy for observation
sequences of length $k$.
Let $H = ((S, p), \Obs, o)$ be an HMM, $\pi, \tau$ two initial state
distributions on $S$, $c \geq 0$ a real number, and $k$ a positive
integer. For a fixed observation sequence $\overline{\obs}$,
computing the probability $\Pr (\overline{\obs} | \pi,
H)$ of observing $\overline{\obs}$ from $\pi$ can be done in
polynomial time~\cite{R:89:ATHMM}. It is straightforward to check if
$\Pr (\overline{\obs} | \pi, H) > c \cdot \Pr (\overline{\obs} |
\tau, H)$ for any fixed observation sequence $\overline{\obs}$. One
simply computes the respective probabilities and then checks the
inequality.
Our algorithm exploits the efficient algorithm for computing the
probability of observation sequences. Rather than a fixed observation
sequence, we declare $k$ SMT variables $\mathsf{w_0}, \mathsf{w_1},
\ldots, \mathsf{w_{k-1}}$ for observations at each step. The
observation at each step is determined by one of the $k$ variables. To
this end, we define the SMT expression
$\textmd{\textsc{Select}} (\mathsf{w}, \{ \obs_1, \obs_2, \ldots,
\obs_m\}, o (s, \bullet))$ equal to $o (s, \obs)$ when the SMT
variable $\mathsf{w}$ is $\obs$. It is easy to formulate by
the SMT $\mathsf{ite}$ (if-then-else) expression:
\begin{align*}
\mathsf{ite} (\mathsf{w} = \obs_1, o (s, \obs_1),
\mathsf{ite} (\mathsf{w} = \obs_2, o (s, \obs_2),
\ldots
,\mathsf{ite} (\mathsf{w} = \obs_m, o (s, \obs_m), \mathsf{w})
\ldots))
\end{align*}
Using $\textmd{\textsc{Select}} (\mathsf{w}, \{ \obs_1, \obs_2,
\ldots, \obs_m\}, o (s, \bullet))$, it is easy to construct an SMT
expression to compute $\Pr (\overline{\mathsf{w}} | \pi, H)$ where
$\overline{\mathsf{w}}$ is a sequence of SMT variables ranging over
the observations $\Obs$ (Algorithm~\ref{algorithm:pufferfish-check}).
Recall the equations (\ref{hmm:basis}) and (\ref{hmm:inductive}). We
simply replace the expression $o (s, \obs)$ with
$\textmd{\textsc{Select}} (\mathsf{w}, \{ \obs_1, \obs_2, \ldots,
\obs_m\}, o (s, \bullet))$ to leave the observation determined by the
SMT variable $\mathsf{w}$. In the algorithm, we also use auxiliary
functions.
$\textmd{\textsc{Product}}(\mathit{smtExp}_0, \mathit{smtExp}_1,
\ldots, \mathit{smtExp}_m)$
returns the SMT expression denoting the product of
$\mathit{smtExp}_0$, $\mathit{smtExp}_1, \ldots,$
$\mathit{smtExp}_m$. Similarly,
$\textmd{\textsc{Sum}}(\mathit{smtExp}_0,$ $\mathit{smtExp}_1, \ldots,$
$\mathit{smtExp}_m)$ returns the SMT expression for the sum
of $\mathit{smtExp}_0$, $\mathit{smtExp}_1, \ldots,$
$\mathit{smtExp}_m$. $\textmd{\textsc{Gt}} (\mathit{smtExp}_0,
\mathit{smtExp}_1)$ returns the SMT expression for
$\mathit{smtExp}_0$ greater than $\mathit{smtExp}_1$.
Finally,
$\textmd{\textsc{Dot}} ([\mathsf{a_0}, \mathsf{a_1}, \ldots,$
$\mathsf{a_n}], [\mathsf{b_0}, \mathsf{b_1}, \ldots, \mathsf{b_n}])$
returns the SMT expression for the inner product of the two lists of
SMT expressions, namely,
\[
\textmd{\textsc{Sum}} (
\textmd{\textsc{Product}}(\mathsf{a_0}, \mathsf{b_0}),
\textmd{\textsc{Product}}(\mathsf{a_1}, \mathsf{b_1}),
\ldots,
\textmd{\textsc{Product}}(\mathsf{a_n}, \mathsf{b_n})
).
\]
Algorithm~\ref{algorithm:pufferfish-check} is summarized in the
following theorem.
\begin{theorem}
Let $H = ((S, p), \Obs, o)$ be an HMM, $\pi, \tau$ state
distributions on $S$, $c > 0$ a real number, and $k > 0$ an
integer. Algorithm~\ref{algorithm:pufferfish-check} returns an SMT
query such that the query is unsatisfiable iff
$\Pr (\overline{\omega} | \pi, H) \leq c \cdot
\Pr (\overline{\omega} | \tau, H)$ for every observation
sequences $\overline{\omega}$ of length $k$.
\end{theorem}
We have implemented our algorithm using \zpython. All examples in
Section~\ref{section:hmm} are checked with our implementation. In
\textit{Example~4}, contracting the disease is independent with
probability $p > 0$. Even though the query contains non-linear
constraints over real numbers, \zpython still proves that the
$\frac{1}{2}$-geometric mechanism satisfies $\ln(2)$-Pufferfish
privacy for any probability $0 < p < 1$. In \textit{Example~5}, it is
assumed that the probability of $m$ individuals contracting the
disease is $p_m > 0$ for $m = 0, 1, 2$. The probabilities $(p_0, p_1,
p_2) = (\frac{1}{2}, \frac{1}{4}, \frac{1}{4})$ are in fact found by
\zpython. The SMT solver is able to find a non-binomial initial state
distribution to satisfy $\ln(2)$-Pufferfish privacy.