所有 DNA 都由一系列缩写为 'A'
,'C'
,'G'
和 'T'
的核苷酸组成,例如:"ACGAATTCCG"
。在研究 DNA 时,识别 DNA 中的重复序列有时会对研究非常有帮助。
编写一个函数来找出所有目标子串,目标子串的长度为 10,且在 DNA 字符串 s
中出现次数超过一次。
示例 1:
输入:s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT" 输出:["AAAAACCCCC","CCCCCAAAAA"]
示例 2:
输入:s = "AAAAAAAAAAAAA" 输出:["AAAAAAAAAA"]
提示:
0 <= s.length <= 105
s[i]
为'A'
、'C'
、'G'
或'T'
class Solution:
def findRepeatedDnaSequences(self, s: str) -> List[str]:
n = 10
subs = set()
res = set()
for i in range(len(s) - n + 1):
sub = s[i:i + n]
if sub in subs:
res.add(sub)
subs.add(sub)
return list(res)
class Solution {
public List<String> findRepeatedDnaSequences(String s) {
int len = 10;
Set<String> subs = new HashSet<>();
Set<String> res = new HashSet<>();
for (int i = 0; i < s.length() - len + 1; ++i) {
String sub = s.substring(i, i + len);
if (subs.contains(sub)) {
res.add(sub);
}
subs.add(sub);
}
return new ArrayList<>(res);
}
}
/**
* @param {string} s
* @return {string[]}
*/
var findRepeatedDnaSequences = function(s) {
let n = 10;
let subs = new Set();
let res = new Set();
for (let i = 0; i < s.length - n + 1; i++) {
let sub = s.slice(i, i + n);
if (subs.has(sub)) {
res.add(sub);
}
subs.add(sub);
}
return [...res];
};