-
Notifications
You must be signed in to change notification settings - Fork 121
/
chapterBasicDataStructures.tex
208 lines (173 loc) · 6.19 KB
/
chapterBasicDataStructures.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
\chapter{Basic Data Structures}
\section{Introduction}
Abstract Data Types (ADT):
\begin{enumerate}
\item Queue
\item Stack
\item HashMap
\end{enumerate}
Implementation (for both queue and stack):
\begin{enumerate}
\item Linked List
\item Resizing Array:
\begin{enumerate}
\item Doubling: when full (100\%).
\item Halfing: when one-quarter full (25\%).
\end{enumerate}
\end{enumerate}
Python Library:
\begin{enumerate}
\item \pythoninline{collections.deque} \footnote{The lowercase and uppercase naming in Python collections are awkward: \href{http://stackoverflow.com/questions/18953681/naming-convention-in-collections-why-are-some-lowercase-and-others-capwords}{discussion}.}
\item \pythoninline{list}
\item \pythoninline{dict, OrderedDict, defaultdict}
\end{enumerate}
Java Library:
\begin{enumerate}
\item \javainline{java.util.Stack<E>}
\item \javainline{java.util.LinkedList<E>}
\item \javainline{java.util.HashMap<K, V>; java.util.TreeMap<K, V>}
\end{enumerate}
\section{Python Library}
\begin{python}
from collections import defaultdict
counter = defaultdict(int)
G = defaultdict(list) # graph of v -> neighbors
G = defaultdict(dict) # G[s][e] -> weight
from collections import deque
path = deque()
path.appendleft("a")
path.append("b")
path.popleft()
path.pop()
path = deque(sorted(path))
import heapq
l = [] # list
heapq.heappush(l, "a")
heapq.heappop(l)
heapq.heapify(l)
heapq.heappushpop(l, "b")
heapq.nsmallest(3, l)
\end{python}
\section{Stack}
\subsection{Stack and Recursion}
How a compiler implements a function:
\begin{enumerate}
\item Function call: push local environment and return address
\item Return: pop return address and local environment.
\end{enumerate}
Recursive function: function calls itself. It can always be implemented by using an explicit stack to remove recursion.
Stack can convert recursive DFS to iterative.
\subsection{Usage}
The core philosophy of using stack is to maintain a relationship invariant among stack element.
The \textbf{relationship invariants} can be:
\begin{enumerate}
\item strictly asc/ strictly desc
\item non-desc/ non-asc
\end{enumerate}
\subsection{Unpaired Parentheses}
\runinhead{Longest Valid Parentheses.}
Given a string containing just the characters `(' and `)', find the length of the longest valid (well-formed) parentheses substring. Core clues:
\begin{enumerate}
\item \textbf{Invariant}: Stack holds the INDEX of UNPAIRED brackets, either ( or ).
\item Thus, \pyinline{stk[-1]} stores the last unpaired bracket.
\item The length of the well-formed parentheses is: if currently \textbf{valid}, current scanning index \pyinline{idx} minus the last \textbf{invalid} index of bracket \pyinline{stk[-1]}
\end{enumerate}
\begin{python}
def longestValidParentheses(self, s):
stk = []
maxa = 0
for idx, val in enumerate(s):
if val == ")" and stk and s[stk[-1]] == "(":
stk.pop()
if not stk:
maxa = max(maxa, idx+1)
else:
maxa = max(maxa, idx-stk[-1])
else:
stk.append(idx)
return maxa
\end{python}
\subsection{Nearest smaller value}\label{allNearestSmaller}
\textbf{Nearest smaller value for each}. Left neighbor of a value $v$ to be the value that occurs prior to $v$, is smaller than $v$, and is closer in position to $v$ than any other smaller value.
For each position in a sequence of numbers, search among the \textit{previous} positions for the last position that contains a smaller value.
\rih{Core clues:}
\begin{enumerate}
\item Nearest $\equiv$ spatial locality.
\item \textbf{Invariant}: Maintain a \textit{strictly increasing} stack.
\item If the question asks for all nearest \textit{larger} values, maintain a \textit{strictly decreasing} stack.
\end{enumerate}
\begin{python}
def allNearestSmaller(self, A):
P = [-1 for _ in A]
stk = []
for i, v in enumerate(A):
while stk and A[stk[-1]] >= v:
stk.pop()
if stk:
P[i] = stk[-1]
else:
P[i] = -1 # no preceding smaller value
stk.append(i) # store the idx or val
return P
\end{python}
\subsection{Non-Decreasing Stack}
\runinhead{Largest Rectangle.} Find the largest rectangle in the matrix (histogram). Given $n$ non-negative integers representing the histogram's bar height where the width of each bar is 1, find the area of largest rectangle in the histogram.
\begin{figure}[]
\centering
\subfloat{\includegraphics[width=0.5\linewidth]{histogram_area}}
\caption{Largest rectangle in histogram\\ $i \ra$ height 2, $cur \ra$ height 6, 5, 1}
\label{fig:histogram_area}
\end{figure}
Keep a stack storing the bars in non-decreasing, then calculate the area by popping out the stack to get the currently lowest bar which determines the height of the rectangle.
\\
\rih{Core clues:}
\begin{enumerate}
\item \textbf{Invariant}: Maintain the non-decreasing stack, using INDEX.
\item Popping triggers the calculation of area
\item Calculate the rectangle width by index diff
\item Post-processing in the end
\end{enumerate}
Code:
\begin{python}
def largestRectangleArea(self, heights):
n = len(heights)
gmax = -sys.maxsize-1
stk = [] # store the idx, non-decreasing stack
for i in range(n):
while stk and height[stk[-1]] > height[i]:
cur = stk.pop()
# calculate area when popping
if stk:
# lower bound stk[-1] + 1
# higher bound i
l = i-(stk[-1]+1)
else:
l = i
area = height[cur]*l
gmax = max(gmax, area)
stk.append(i)
# after array scan, process the dangling stack
i = n
...
return gmax
\end{python}
\section{Map}
\subsection{Math relations}
\textbf{1-1 Map}. Mathematically, full projection. One map, dual entries.
\begin{python}
class OneToOneMap:
def __init__(self):
self.m = {} # keep a single map
def set(self, a, b):
self.m[a] = b
self.m[b] = a
def get(self, a):
return self.m.get(a)
\end{python}
\subsection{Operations}
\runinhead{Sorting by value.} Sort the map entries by values \pyinline{itemgetter}.
\begin{python}
from operators import itemgetter
sorted(hm.items(), key=itemgetter(1), reverse=True)
sorted(hm.items(), key=lambda x: x[1], reverse=True)
\end{python}