Rosalind Finding a Shared Motif

发布于 2024-03-10  3 次阅读


def find_longest_common_substring(seqs):
    shortest_seq = min(seqs, key=len)  # 找到最短的序列作为参考

    def is_common_substr(sub, seqs):
        return all(sub in seq for seq in seqs)  # 检查子串是否在所有序列中都存在

    for length in range(len(shortest_seq), 0, -1):  # 从最长子序列开始逐渐缩短
        for start in range(len(shortest_seq) - length + 1):
            substr = shortest_seq[start:start+length]  # 提取子串
            found_in_all = all(substr in seq for seq in seqs if seq != shortest_seq)  # 检查子串是否在其他序列中都存在
            if found_in_all:
                return substr  # 如果子串在所有序列中都存在,则返回该子串作为最长公共子串

def read_fasta(file_path):
    sequences = []
    with open(file_path, 'r') as file:
        sequence = ''
        for line in file:
            if line.startswith('>'):
                if sequence:
                    sequences.append(sequence)  # 将上一个序列添加到序列列表中
                sequence = ''
            else:
                sequence += line.strip()  # 将当前行的DNA序列添加到当前序列中
        sequences.append(sequence)  # 添加最后一个序列
    return sequences

def main():
    sequences = read_fasta("rosalind_lcsm.txt")  # 读取FASTA文件中的DNA序列
    result = find_longest_common_substring(sequences)  # 找到最长公共子串
    print(result)  # 输出结果

if __name__ == "__main__":
    main()
最后更新于 2024-03-10