Question

Needleman-Wunsch with Python

2

Entering edit mode

4.5 years ago

salvatoredanilopalumbo ▴ 30

Hi, I'm implementing the Needleman-wunsch algorithm with python and I don't know how to use the open penalty gap. Also, i want to use the BLOSUM62 matrix. Can anyone help me? Thanks

import pandas as pd
import numpy as np
from Bio.SubsMat import MatrixInfo


seq1 ='ACGGTCT'  
seq2 = "ATGGCCTC"


pt ={'MATCH': +1, 'MISMATCH': -3, 'GAP': -4, 'OPENGAP':0.5}


blosum62 = MatrixInfo.blosum62




def match(a, b):
    if a == b:
        return pt['MATCH']
    elif a == '-' or b == '-':
        return pt['GAP']
    else:
        return pt['MISMATCH']






def n_w(s1, s2):
    m, n = len(s1), len(s2)
    matrix = np.zeros((m+1, n+1))

    #Initialization
    for i in range(m+1):
        matrix[i][0] = pt['GAP'] * i
    for j in range(n+1):
        matrix[0][j] = pt['GAP'] * j

    #Fill
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            diag = matrix[i-1][j-1] + match(s1[i-1], s2[j-1])
            delete = matrix[i-1][j] + pt['GAP']
            insert = matrix[i][j-1] + pt['GAP']
            matrix[i][j] = max(diag, delete, insert)

    print('Matrix = \n%s\n' % matrix)
    align1, align2 = '', ''
    i,j = m,n



    #Traceback
    while i > 0 and j > 0:
        score_current = matrix[i][j]
        score_diag = matrix[i-1][j-1]
        score_left = matrix[i][j-1]
        score_up = matrix[i-1][j]

        if score_current == score_diag + match(s1[i-1], s2[j-1]):
            a1,a2 = s1[i-1],s2[j-1]
            i,j = i-1,j-1
        elif score_current == score_up + pt['GAP']:
            a1,a2 = s1[i-1],'-'
            i -= 1
        elif score_current == score_left + pt['GAP']:
            a1,a2 = '-',s2[j-1]
            j -= 1
        align1 += a1
        align2 += a2


    while i > 0:
        a1,a2 = s1[i-1],'-'
        align1 += a1
        align2 += a2
        i -= 1

    while j > 0:
        a1,a2 = '-',s2[j-1]
        align1 += a1
        align2 += a2
        j -= 1

    align1 = align1[::-1]
    align2 = align2[::-1]
    seqN = len(align1)
    sym = ''
    seq_score = 0
    identity = 0
    for i in range(seqN):
        a1 = align1[i]
        a2 = align2[i]
        if a1 == a2:
            sym += a1
            identity += 1
            seq_score += match(a1, a2)

        else: 
            seq_score += match(a1, a2)
            sym += ' '

    identity = identity/seqN * 100

    print('identityity = %2.1f percent' % identity)
    print('Score = %d\n'% seq_score)

    print(align1)
    print(sym)
    print(align2)




n_w(seq1, seq2)

python alignment gap gap penalty blosum62 • 5.0k views

ADD COMMENT • link written 4.5 years ago by salvatoredanilopalumbo ▴ 30

1

Entering edit mode

How To Ask Good Questions On Technical And Scientific Forums

ADD REPLY • link 4.5 years ago by karl.stamm 4.1k

1

Entering edit mode

For differentiating between gap extension and opening, you need to use an affine gap penalty model. For instance, have a look here.

ADD REPLY • link 4.5 years ago by cschu181 ★ 2.8k

0

Entering edit mode

Your indentation is wrong

ADD REPLY • link 4.5 years ago by Asaf 10k

1

Entering edit mode

Ok, i fixed the code

ADD REPLY • link 4.5 years ago by salvatoredanilopalumbo ▴ 30

0

Entering edit mode

Yup, sorry but i'm newbie

ADD REPLY • link 4.5 years ago by salvatoredanilopalumbo ▴ 30