chapterBasicDataStructures.tex

\chapter{Basic Data  Structures}


\section{Introduction}
Abstract Data Types (ADT):
\begin{enumerate}
\item Queue
\item Stack
\item HashMap
\end{enumerate}
Implementation (for both queue and stack):
\begin{enumerate}
\item Linked List
\item Resizing Array:
\begin{enumerate}
\item Doubling: when full (100\%).
\item Halfing: when one-quarter full (25\%). 
\end{enumerate}
\end{enumerate}
Python Library:
\begin{enumerate}
\item \pythoninline{collections.deque} \footnote{The lowercase and uppercase naming in Python collections are awkward: \href{http://stackoverflow.com/questions/18953681/naming-convention-in-collections-why-are-some-lowercase-and-others-capwords}{discussion}.}
\item \pythoninline{list}
\item \pythoninline{dict, OrderedDict, defaultdict}
\end{enumerate}
Java Library:
\begin{enumerate}
\item \javainline{java.util.Stack<E>}
\item \javainline{java.util.LinkedList<E>}
\item \javainline{java.util.HashMap<K, V>; java.util.TreeMap<K, V>}
\end{enumerate}

\section{Python Library}
\begin{python}
from collections import defaultdict
counter = defaultdict(int)
G = defaultdict(list)  # graph of v -> neighbors
G = defaultdict(dict)  # G[s][e] -> weight


from collections import deque
path = deque()
path.appendleft("a")
path.append("b")
path.popleft()
path.pop()
path = deque(sorted(path))


import heapq
l = []  # list 
heapq.heappush(l, "a")
heapq.heappop(l)
heapq.heapify(l)
heapq.heappushpop(l, "b")
heapq.nsmallest(3, l)
\end{python}

\section{Stack}
\subsection{Stack and Recursion}
How a compiler implements a function:
\begin{enumerate}
\item Function call: push local environment and return address
\item Return: pop return address and local environment. 
\end{enumerate}

Recursive function: function calls itself. It can always be implemented by using an explicit stack to remove recursion. 

Stack can convert recursive DFS to iterative. 

\subsection{Usage}
The core philosophy of using stack is to maintain a relationship invariant among stack element. 

The \textbf{relationship invariants} can be:
\begin{enumerate}
\item strictly asc/ strictly desc
\item non-desc/ non-asc
\end{enumerate}

\subsection{Unpaired Parentheses}
\runinhead{Longest Valid Parentheses.} 
Given a string containing just the characters `(' and `)', find the length of the longest valid (well-formed) parentheses substring. Core clues:
\begin{enumerate}
\item \textbf{Invariant}: Stack holds the INDEX of UNPAIRED brackets, either ( or ).
\item Thus, \pyinline{stk[-1]} stores the last unpaired bracket. 
\item The length of the well-formed parentheses is: if currently \textbf{valid}, current scanning index \pyinline{idx} minus the last \textbf{invalid} index of bracket \pyinline{stk[-1]}
\end{enumerate}
\begin{python}
def longestValidParentheses(self, s):
  stk = []
  maxa = 0
  for idx, val in enumerate(s):
    if val == ")" and stk and s[stk[-1]] == "(":
      stk.pop()
      if not stk:
        maxa = max(maxa, idx+1)
      else:
        maxa = max(maxa, idx-stk[-1])
    else:
      stk.append(idx)

  return maxa

\end{python}

\subsection{Nearest smaller value}\label{allNearestSmaller}
\textbf{Nearest smaller value for each}. Left neighbor of a value $v$ to be the value that occurs prior to $v$, is smaller than $v$, and is closer in position to $v$ than any other smaller value.

For each position in a sequence of numbers, search among the \textit{previous} positions for the last position that contains a smaller value. 

\rih{Core clues:}
\begin{enumerate}
\item Nearest $\equiv$ spatial locality.
\item \textbf{Invariant}: Maintain a \textit{strictly increasing} stack.  
\item If the question asks for all nearest \textit{larger} values, maintain a \textit{strictly decreasing} stack.  
\end{enumerate}

\begin{python}
def allNearestSmaller(self, A):
    P = [-1 for _ in A]
    stk = []
    for i, v in enumerate(A):
        while stk and A[stk[-1]] >= v:
            stk.pop()

        if stk:
            P[i] = stk[-1]
        else:
            P[i] = -1  # no preceding smaller value
            
        stk.append(i)  # store the idx or val

    return P
\end{python}


\subsection{Non-Decreasing Stack}
\runinhead{Largest Rectangle.} Find the largest rectangle in the matrix (histogram). Given $n$ non-negative integers representing the histogram's bar height where the width of each bar is 1, find the area of largest rectangle in the histogram. 

\begin{figure}[]
\centering
\subfloat{\includegraphics[width=0.5\linewidth]{histogram_area}}
\caption{Largest rectangle in histogram\\ $i \ra$ height 2, $cur \ra$ height 6, 5, 1}
\label{fig:histogram_area}
\end{figure}

Keep a stack storing the bars in non-decreasing, then calculate the area by popping out the stack to get the currently lowest bar which determines the height of the rectangle.
\\
\rih{Core clues:}
\begin{enumerate}
\item \textbf{Invariant}: Maintain the non-decreasing stack, using INDEX.
\item Popping triggers the calculation of area
\item Calculate the rectangle width by index diff
\item Post-processing in the end
\end{enumerate}

Code:
\begin{python}
def largestRectangleArea(self, heights):
    n = len(heights)
    gmax = -sys.maxsize-1
    stk = []  # store the idx, non-decreasing stack

    for i in range(n):
        while stk and height[stk[-1]] > height[i]:
            cur = stk.pop()
            # calculate area when popping
            if stk:
                # lower bound stk[-1] + 1
                # higher bound i 
                l = i-(stk[-1]+1)
            else:
            	l = i
	
            area = height[cur]*l
            gmax = max(gmax, area)

        stk.append(i)

    # after array scan, process the dangling stack
    i = n
    ...

    return gmax
\end{python}

\section{Map}
\subsection{Math relations}
\textbf{1-1 Map}. Mathematically, full projection. One map, dual entries.
\begin{python}
class OneToOneMap:
    def __init__(self):
        self.m = {}  # keep a single map

    def set(self, a, b):
        self.m[a] = b
        self.m[b] = a

    def get(self, a):
        return self.m.get(a)
\end{python}
\subsection{Operations}
\runinhead{Sorting by value.} Sort the map entries by values \pyinline{itemgetter}.
\begin{python}
from operators import itemgetter 
sorted(hm.items(), key=itemgetter(1), reverse=True)
sorted(hm.items(), key=lambda x: x[1], reverse=True)
\end{python}