forked from kamyu104/LeetCode-Solutions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
top-k-frequent-words.py
126 lines (109 loc) · 3.6 KB
/
top-k-frequent-words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Time: O(n + klogk) on average
# Space: O(n)
import collections
import heapq
from random import randint
class Solution(object):
def topKFrequent(self, words, k):
"""
:type words: List[str]
:type k: int
:rtype: List[str]
"""
counts = collections.Counter(words)
p = []
for key, val in counts.iteritems():
p.append((-val, key))
self.kthElement(p, k)
result = []
sorted_p = sorted(p[:k])
for i in xrange(k):
result.append(sorted_p[i][1])
return result
def kthElement(self, nums, k): # O(n) on average
def PartitionAroundPivot(left, right, pivot_idx, nums):
pivot_value = nums[pivot_idx]
new_pivot_idx = left
nums[pivot_idx], nums[right] = nums[right], nums[pivot_idx]
for i in xrange(left, right):
if nums[i] < pivot_value:
nums[i], nums[new_pivot_idx] = nums[new_pivot_idx], nums[i]
new_pivot_idx += 1
nums[right], nums[new_pivot_idx] = nums[new_pivot_idx], nums[right]
return new_pivot_idx
left, right = 0, len(nums) - 1
while left <= right:
pivot_idx = randint(left, right)
new_pivot_idx = PartitionAroundPivot(left, right, pivot_idx, nums)
if new_pivot_idx == k - 1:
return
elif new_pivot_idx > k - 1:
right = new_pivot_idx - 1
else: # new_pivot_idx < k - 1.
left = new_pivot_idx + 1
# Time: O(nlogk)
# Space: O(n)
# Heap Solution
class Solution2(object):
def topKFrequent(self, words, k):
"""
:type words: List[str]
:type k: int
:rtype: List[str]
"""
class MinHeapObj(object):
def __init__(self,val):
self.val = val
def __lt__(self,other):
return self.val[1] > other.val[1] if self.val[0] == other.val[0] else \
self.val < other.val
def __eq__(self,other):
return self.val == other.val
def __str__(self):
return str(self.val)
counts = collections.Counter(words)
min_heap = []
for word, count in counts.iteritems():
heapq.heappush(min_heap, MinHeapObj((count, word)))
if len(min_heap) == k+1:
heapq.heappop(min_heap)
result = []
while min_heap:
result.append(heapq.heappop(min_heap).val[1])
return result[::-1]
# Time: O(n + klogk) ~ O(n + nlogn)
# Space: O(n)
# Bucket Sort Solution
class Solution3(object):
def topKFrequent(self, words, k):
"""
:type words: List[str]
:type k: int
:rtype: List[str]
"""
counts = collections.Counter(words)
buckets = [[] for _ in xrange(len(words)+1)]
for word, count in counts.iteritems():
buckets[count].append(word)
pairs = []
for i in reversed(xrange(len(words))):
for word in buckets[i]:
pairs.append((-i, word))
if len(pairs) >= k:
break
pairs.sort()
return [pair[1] for pair in pairs[:k]]
# time: O(nlogn)
# space: O(n)
from collections import Counter
class Solution4(object):
def topKFrequent(self, words, k):
"""
:type words: List[str]
:type k: int
:rtype: List[str]
"""
counter = Counter(words)
candidates = counter.keys()
candidates.sort(key=lambda w: (-counter[w], w))
return candidates[:k]