Question: Python script for sequence alignment not running
0
gravatar for er.doug.ragnar
6 months ago by
er.doug.ragnar20 wrote:
def read_Fasta(filename):
    from re import sub, search
    res = []
    sequence = None
    info = None
    fh = open(filename)
    for line in fh:
        if search(">.*", line):
            if sequence is not None and info is not None and sequence != "":
                res.append(sequence)
            info = line
            sequence = ""
        else:
            if sequence is None:
                return None
            else:
                sequence += sub("\s", "", line)
    if sequence is not None and info is not None and sequence != "":
        res.append(sequence)
    fh.close()
    return res

def read_submat_file(filename):
    sm = {}
    f = open(filename, "r")
    line = f.readline()
    tokens = line.split("\t")
    ns = len(tokens)
    alphabet = []
    for i in range(0, ns):
        alphabet.append(tokens[i][0])
    for i in range(0, ns):
        line = f.readline()
        tokens = line.split("\t")
        for j in range(0, len(tokens)):
            k = alphabet[i] + alphabet[j]
            sm[k] = int(tokens[j])
    return sm

def needleman_wunsch (seq1, seq2, sm, g):
    """Global Alignment"""
    sm = read_submat_file(f)
    seq1 = read_Fasta("A.fasta")
    seq2 = read_Fasta("B.fasta")
    S = [[0]]
    T = [[0]]
    # initialize gaps in rows
    for j in range(1, len(seq2)+1):
        S[0].append(g * j)
        T[0].append(3)
    # initialize gaps in cols
    for i in range(1, len(seq1)+1):
        S.append([g * i])
        T.append([2])
    # apply the recurrence to fill the matrices
    for i in range(0, len(seq1)):
        for j in range(len(seq2)):
            s1 = S[i][j] + score_pos(seq1[i], seq2[j], sm, g)
            s2 = S[i][j+1] + g
            s3 = S[i+1][j] + g
            S[i+1].append(max(s1, s2, s3))
            T[i+1].append(max3t(s1, s2, s3))
    return (S, T)

def test_DNA_GlobalAlign():
    sm = read_submat_file("blosum62.mat")
    seq1 = read_Fasta("A.fasta")
    seq2 = read_Fasta("B.fasta")
    p = -3
    res = needleman_wunsch(seq1, seq2, sm, p)
    S = res[0]
    T = res[1]
    print("Score of optimal alignment:", S[len(seq1)][len(seq2)])
    print_mat(S)
    print_mat(T)
    alig = recover_align(T, seq1, seq2)
    print(alig[0])
    print(alig[1])

test_DNA_GlobalAlign()

I have been trying to run the python script to align 2 fasta files, it reads the files but I can't seem to use the previous functions on the subsequent functions. I get these errors.

>>> test_DNA_GlobalAlign()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<stdin>", line 6, in test_DNA_GlobalAlign
  File "<stdin>", line 20, in needleman_wunsch
  File "<stdin>", line 6, in score_pos
alignment python • 269 views
ADD COMMENTlink modified 6 months ago by Mensur Dlakic6.9k • written 6 months ago by er.doug.ragnar20
1
gravatar for Mensur Dlakic
6 months ago by
Mensur Dlakic6.9k
USA
Mensur Dlakic6.9k wrote:

You probably need to delete these lines from needleman_wunsch:

sm = read_submat_file(f)
seq1 = read_Fasta("A.fasta")
seq2 = read_Fasta("B.fasta")

By the way, you can achieve this task with a dozen or so lines of code in Biopython.

ADD COMMENTlink written 6 months ago by Mensur Dlakic6.9k
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 848 users visited in the last hour