Problem in Python script of NJ
0
0
Entering edit mode
18 months ago
anasjamshed ▴ 120

I am trying to implement the NJ algorithm in python :

import sys
import numpy as np
import scipy as scipy
import itertools
from sys import maxsize

def calculateQ(d):
    r = d.shape[0]
    q = np.zeros((r,r))
    for i in range(r):
        for j in range(r):
            if i == j:
                q[i][j] = 0
            else:
                sumI = 0
                sumJ = 0
                for k in range(r):
                    sumI += d[i][k]
                    sumJ += d[j][k]
                q[i][j] = (r-2) * d[i][j] - sumI - sumJ

    return q

def findLowestPair(q):
    r = q.shape[0]
    minVal = maxsize
    for i in range(0,r):
        for j in range(i,r):
            if (q[i][j] < minVal):
                minVal = q[i][j]
                minIndex = (i,j)
    return minIndex


def doDistOfPairMembersToNewNode(i,j,d):
    r = d.shape[0]
    sumI = 0
    sumJ = 0
    for k in range(r):
        sumI += d[i][k]
        sumJ += d[j][k]

    dfu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] + sumI - sumJ)
    dgu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] - sumI + sumJ)

    return (dfu,dgu)

def calculateNewDistanceMatrix(f,g,d):
    print (d)
    r = d.shape[0]
    nd = np.zeros((r-1,r-1))

    # Copy over the old data to this matrix
    ii = jj = 1
    for i in range(0,r):
        if i == f or i == g:
            continue
        for j in range(0,r):
            if j == f or j == g:
                continue
            nd[ii][jj] = d[i][j]
            jj += 1
        ii += 1
        jj = 1

    # Calculate the first row and column
    ii = 1
    for i in range (0,r):
        if i == f or i == g:
            continue
        nd[0][ii] = (d[f][i] + d[g][i] - d[f][g]) / 2.
        nd[ii][0] = (d[f][i] + d[g][i] - d[f][g]) / 2.
        ii += 1

    return nd

def doNeighbourJoining(d):
    labels = ["Human","Chimp","Gorilla","Orangutan","Gibbon"]

    while d.shape[0] > 1:
        q = calculateQ(d)
        lowestPair = findLowestPair(q)
        print ("Joining")
        print (lowestPair[0])
        print (lowestPair[1])
        # newlabel = "%s%s" % (labels[lowestPair[0]], labels[lowestPair[1]])
        # print "lowestPair[0]=%i\tlowestPair[1]=%i" % (lowestPair[0], lowestPair[1])
        # print labels
        # print newlabel
        # del labels[lowestPair[0]]
        # del labels[lowestPair[1]]
        # labels.insert(0,newlabel)

        i = lowestPair[0]
        j = lowestPair[1]

        pairDist = doDistOfPairMembersToNewNode(i,j,d)
        d = calculateNewDistanceMatrix(i,j,d)
        # print d


def run(distMatrix):
    doNeighbourJoining(distMatrix)

if __name__ == "__main__":
    if len(sys.argv) > 1:
        print ("Usage: neighbour-joining.py")

    distMatrix = np.array(
        [
            [  0,  79,  92,  144, 162],
            [ 79,   0,  95,  154, 169],
            [ 92,  102,   0,  150,  169],
            [ 144,  154,  150,   0,  169],
            [163, 173,  169,  169,   0],

         ]
        )

run(distMatrix)

But I am getting the following error:

---> 47     dfu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] + sumI - sumJ)
     48     dgu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] - sumI + sumJ)
     49 

ZeroDivisionError: float division by zero

The code gives a 5X5 matrix, then a 4x4 and 3x3 matrix as an output. But, it should output a 2x2 and 1X1 as well

Can anyone help me?

python • 309 views
ADD COMMENT

Login before adding your answer.

Traffic: 1690 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6