Question: Needleman-Wunsch with Python
0
gravatar for salvatoredanilopalumbo
4 months ago by

Hi, I'm implementing the Needleman-wunsch algorithm with python and I don't know how to use the open penalty gap. Also, i want to use the BLOSUM62 matrix. Can anyone help me? Thanks

import pandas as pd
import numpy as np
from Bio.SubsMat import MatrixInfo


seq1 ='ACGGTCT'  
seq2 = "ATGGCCTC"


pt ={'MATCH': +1, 'MISMATCH': -3, 'GAP': -4, 'OPENGAP':0.5}


blosum62 = MatrixInfo.blosum62




def match(a, b):
    if a == b:
        return pt['MATCH']
    elif a == '-' or b == '-':
        return pt['GAP']
    else:
        return pt['MISMATCH']






def n_w(s1, s2):
    m, n = len(s1), len(s2)
    matrix = np.zeros((m+1, n+1))

    #Initialization
    for i in range(m+1):
        matrix[i][0] = pt['GAP'] * i
    for j in range(n+1):
        matrix[0][j] = pt['GAP'] * j

    #Fill
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            diag = matrix[i-1][j-1] + match(s1[i-1], s2[j-1])
            delete = matrix[i-1][j] + pt['GAP']
            insert = matrix[i][j-1] + pt['GAP']
            matrix[i][j] = max(diag, delete, insert)

    print('Matrix = \n%s\n' % matrix)
    align1, align2 = '', ''
    i,j = m,n



    #Traceback
    while i > 0 and j > 0:
        score_current = matrix[i][j]
        score_diag = matrix[i-1][j-1]
        score_left = matrix[i][j-1]
        score_up = matrix[i-1][j]

        if score_current == score_diag + match(s1[i-1], s2[j-1]):
            a1,a2 = s1[i-1],s2[j-1]
            i,j = i-1,j-1
        elif score_current == score_up + pt['GAP']:
            a1,a2 = s1[i-1],'-'
            i -= 1
        elif score_current == score_left + pt['GAP']:
            a1,a2 = '-',s2[j-1]
            j -= 1
        align1 += a1
        align2 += a2


    while i > 0:
        a1,a2 = s1[i-1],'-'
        align1 += a1
        align2 += a2
        i -= 1

    while j > 0:
        a1,a2 = '-',s2[j-1]
        align1 += a1
        align2 += a2
        j -= 1

    align1 = align1[::-1]
    align2 = align2[::-1]
    seqN = len(align1)
    sym = ''
    seq_score = 0
    identity = 0
    for i in range(seqN):
        a1 = align1[i]
        a2 = align2[i]
        if a1 == a2:
            sym += a1
            identity += 1
            seq_score += match(a1, a2)

        else: 
            seq_score += match(a1, a2)
            sym += ' '

    identity = identity/seqN * 100

    print('identityity = %2.1f percent' % identity)
    print('Score = %d\n'% seq_score)

    print(align1)
    print(sym)
    print(align2)




n_w(seq1, seq2)
ADD COMMENTlink modified 3 months ago by salihakevserkavuncu0 • written 4 months ago by salvatoredanilopalumbo0
1

How To Ask Good Questions On Technical And Scientific Forums

ADD REPLYlink written 4 months ago by karl.stamm3.6k

Your indentation is wrong

ADD REPLYlink written 4 months ago by Asaf7.0k

Yup, sorry but i'm newbie

ADD REPLYlink written 4 months ago by salvatoredanilopalumbo0

Ok, i fixed the code

ADD REPLYlink written 4 months ago by salvatoredanilopalumbo0

For differentiating between gap extension and opening, you need to use an affine gap penalty model. For instance, have a look here.

ADD REPLYlink written 4 months ago by cschu1812.1k
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 1278 users visited in the last hour