Question: Needleman-Wunsch with Python
1
gravatar for salvatoredanilopalumbo
12 months ago by
salvatoredanilopalumbo20 wrote:

Hi, I'm implementing the Needleman-wunsch algorithm with python and I don't know how to use the open penalty gap. Also, i want to use the BLOSUM62 matrix. Can anyone help me? Thanks

import pandas as pd
import numpy as np
from Bio.SubsMat import MatrixInfo


seq1 ='ACGGTCT'  
seq2 = "ATGGCCTC"


pt ={'MATCH': +1, 'MISMATCH': -3, 'GAP': -4, 'OPENGAP':0.5}


blosum62 = MatrixInfo.blosum62




def match(a, b):
    if a == b:
        return pt['MATCH']
    elif a == '-' or b == '-':
        return pt['GAP']
    else:
        return pt['MISMATCH']






def n_w(s1, s2):
    m, n = len(s1), len(s2)
    matrix = np.zeros((m+1, n+1))

    #Initialization
    for i in range(m+1):
        matrix[i][0] = pt['GAP'] * i
    for j in range(n+1):
        matrix[0][j] = pt['GAP'] * j

    #Fill
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            diag = matrix[i-1][j-1] + match(s1[i-1], s2[j-1])
            delete = matrix[i-1][j] + pt['GAP']
            insert = matrix[i][j-1] + pt['GAP']
            matrix[i][j] = max(diag, delete, insert)

    print('Matrix = \n%s\n' % matrix)
    align1, align2 = '', ''
    i,j = m,n



    #Traceback
    while i > 0 and j > 0:
        score_current = matrix[i][j]
        score_diag = matrix[i-1][j-1]
        score_left = matrix[i][j-1]
        score_up = matrix[i-1][j]

        if score_current == score_diag + match(s1[i-1], s2[j-1]):
            a1,a2 = s1[i-1],s2[j-1]
            i,j = i-1,j-1
        elif score_current == score_up + pt['GAP']:
            a1,a2 = s1[i-1],'-'
            i -= 1
        elif score_current == score_left + pt['GAP']:
            a1,a2 = '-',s2[j-1]
            j -= 1
        align1 += a1
        align2 += a2


    while i > 0:
        a1,a2 = s1[i-1],'-'
        align1 += a1
        align2 += a2
        i -= 1

    while j > 0:
        a1,a2 = '-',s2[j-1]
        align1 += a1
        align2 += a2
        j -= 1

    align1 = align1[::-1]
    align2 = align2[::-1]
    seqN = len(align1)
    sym = ''
    seq_score = 0
    identity = 0
    for i in range(seqN):
        a1 = align1[i]
        a2 = align2[i]
        if a1 == a2:
            sym += a1
            identity += 1
            seq_score += match(a1, a2)

        else: 
            seq_score += match(a1, a2)
            sym += ' '

    identity = identity/seqN * 100

    print('identityity = %2.1f percent' % identity)
    print('Score = %d\n'% seq_score)

    print(align1)
    print(sym)
    print(align2)




n_w(seq1, seq2)
ADD COMMENTlink modified 11 months ago by salihakevserkavuncu0 • written 12 months ago by salvatoredanilopalumbo20
1

How To Ask Good Questions On Technical And Scientific Forums

ADD REPLYlink written 12 months ago by karl.stamm3.8k
1

For differentiating between gap extension and opening, you need to use an affine gap penalty model. For instance, have a look here.

ADD REPLYlink written 12 months ago by cschu1812.5k

Your indentation is wrong

ADD REPLYlink written 12 months ago by Asaf8.4k
1

Ok, i fixed the code

ADD REPLYlink written 12 months ago by salvatoredanilopalumbo20

Yup, sorry but i'm newbie

ADD REPLYlink written 12 months ago by salvatoredanilopalumbo20
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 831 users visited in the last hour