Question: Needleman-Wunsch with Python
1
gravatar for salvatoredanilopalumbo
8 months ago by
salvatoredanilopalumbo20 wrote:

Hi, I'm implementing the Needleman-wunsch algorithm with python and I don't know how to use the open penalty gap. Also, i want to use the BLOSUM62 matrix. Can anyone help me? Thanks

import pandas as pd
import numpy as np
from Bio.SubsMat import MatrixInfo


seq1 ='ACGGTCT'  
seq2 = "ATGGCCTC"


pt ={'MATCH': +1, 'MISMATCH': -3, 'GAP': -4, 'OPENGAP':0.5}


blosum62 = MatrixInfo.blosum62




def match(a, b):
    if a == b:
        return pt['MATCH']
    elif a == '-' or b == '-':
        return pt['GAP']
    else:
        return pt['MISMATCH']






def n_w(s1, s2):
    m, n = len(s1), len(s2)
    matrix = np.zeros((m+1, n+1))

    #Initialization
    for i in range(m+1):
        matrix[i][0] = pt['GAP'] * i
    for j in range(n+1):
        matrix[0][j] = pt['GAP'] * j

    #Fill
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            diag = matrix[i-1][j-1] + match(s1[i-1], s2[j-1])
            delete = matrix[i-1][j] + pt['GAP']
            insert = matrix[i][j-1] + pt['GAP']
            matrix[i][j] = max(diag, delete, insert)

    print('Matrix = \n%s\n' % matrix)
    align1, align2 = '', ''
    i,j = m,n



    #Traceback
    while i > 0 and j > 0:
        score_current = matrix[i][j]
        score_diag = matrix[i-1][j-1]
        score_left = matrix[i][j-1]
        score_up = matrix[i-1][j]

        if score_current == score_diag + match(s1[i-1], s2[j-1]):
            a1,a2 = s1[i-1],s2[j-1]
            i,j = i-1,j-1
        elif score_current == score_up + pt['GAP']:
            a1,a2 = s1[i-1],'-'
            i -= 1
        elif score_current == score_left + pt['GAP']:
            a1,a2 = '-',s2[j-1]
            j -= 1
        align1 += a1
        align2 += a2


    while i > 0:
        a1,a2 = s1[i-1],'-'
        align1 += a1
        align2 += a2
        i -= 1

    while j > 0:
        a1,a2 = '-',s2[j-1]
        align1 += a1
        align2 += a2
        j -= 1

    align1 = align1[::-1]
    align2 = align2[::-1]
    seqN = len(align1)
    sym = ''
    seq_score = 0
    identity = 0
    for i in range(seqN):
        a1 = align1[i]
        a2 = align2[i]
        if a1 == a2:
            sym += a1
            identity += 1
            seq_score += match(a1, a2)

        else: 
            seq_score += match(a1, a2)
            sym += ' '

    identity = identity/seqN * 100

    print('identityity = %2.1f percent' % identity)
    print('Score = %d\n'% seq_score)

    print(align1)
    print(sym)
    print(align2)




n_w(seq1, seq2)
ADD COMMENTlink modified 7 months ago by salihakevserkavuncu0 • written 8 months ago by salvatoredanilopalumbo20
1

How To Ask Good Questions On Technical And Scientific Forums

ADD REPLYlink written 8 months ago by karl.stamm3.6k
1

For differentiating between gap extension and opening, you need to use an affine gap penalty model. For instance, have a look here.

ADD REPLYlink written 8 months ago by cschu1812.3k

Your indentation is wrong

ADD REPLYlink written 8 months ago by Asaf8.1k
1

Ok, i fixed the code

ADD REPLYlink written 8 months ago by salvatoredanilopalumbo20

Yup, sorry but i'm newbie

ADD REPLYlink written 8 months ago by salvatoredanilopalumbo20
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 1588 users visited in the last hour