forked from shuboc/LeetCode-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
repeated-dna-sequences.py
50 lines (43 loc) · 1.5 KB
/
repeated-dna-sequences.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Time: O(n)
# Space: O(n)
# All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T,
# for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
#
# Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
#
# For example,
#
# Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT",
#
# Return:
# ["AAAAACCCCC", "CCCCCAAAAA"].
import collections
class Solution(object):
def findRepeatedDnaSequences(self, s):
"""
:type s: str
:rtype: List[str]
"""
dict, rolling_hash, res = {}, 0, []
for i in xrange(len(s)):
rolling_hash = ((rolling_hash << 3) & 0x3fffffff) | (ord(s[i]) & 7)
if rolling_hash not in dict:
dict[rolling_hash] = True
elif dict[rolling_hash]:
res.append(s[i - 9: i + 1])
dict[rolling_hash] = False
return res
def findRepeatedDnaSequences2(self, s):
"""
:type s: str
:rtype: List[str]
"""
l, r = [], []
if len(s) < 10: return []
for i in range(len(s) - 9):
l.extend([s[i:i + 10]])
return [k for k, v in collections.Counter(l).items() if v > 1]
if __name__ == "__main__":
print Solution().findRepeatedDnaSequences("AAAAAAAAAA")
print Solution().findRepeatedDnaSequences("")
print Solution().findRepeatedDnaSequences("AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT")