def find_longest_common_substring(seqs):
shortest_seq = min(seqs, key=len) # 找到最短的序列作为参考
def is_common_substr(sub, seqs):
return all(sub in seq for seq in seqs) # 检查子串是否在所有序列中都存在
for length in range(len(shortest_seq), 0, -1): # 从最长子序列开始逐渐缩短
for start in range(len(shortest_seq) - length + 1):
substr = shortest_seq[start:start+length] # 提取子串
found_in_all = all(substr in seq for seq in seqs if seq != shortest_seq) # 检查子串是否在其他序列中都存在
if found_in_all:
return substr # 如果子串在所有序列中都存在,则返回该子串作为最长公共子串
def read_fasta(file_path):
sequences = []
with open(file_path, 'r') as file:
sequence = ''
for line in file:
if line.startswith('>'):
if sequence:
sequences.append(sequence) # 将上一个序列添加到序列列表中
sequence = ''
else:
sequence += line.strip() # 将当前行的DNA序列添加到当前序列中
sequences.append(sequence) # 添加最后一个序列
return sequences
def main():
sequences = read_fasta("rosalind_lcsm.txt") # 读取FASTA文件中的DNA序列
result = find_longest_common_substring(sequences) # 找到最长公共子串
print(result) # 输出结果
if __name__ == "__main__":
main()
Rosalind Finding a Shared Motif
发布于 2024-03-10 3 次阅读
Comments NOTHING