How to collapse leaves with the same name in ETE3 tree?
1
0
Entering edit mode
2.8 years ago

I'm trying to collapse leaves that have the same name in ETE3's tree. I think I almost got it. Two leaves in my example (A. thaliana) have been combined into a single leaf, but not named. I would like it to have the same name as its components.

from ete3 import Tree

def collapsed_leaf(node):
    if len(node2labels[node]) == 1:
       return True
    else:
       return False


t1 = Tree('(("A. thaliana":0.723274,"A. thaliana":0.567784):0.067192,("B":0.279326,"H":0.756049):0.807788);', quoted_node_names=True)
node2labels = t1.get_cached_content(store_attr="name")
print(t1)
#       /-A. thaliana
#    /-|
#   |   \-A. thaliana
# --|
#   |   /-B
#    \-|
#       \-H

t2 = Tree(t1.write(is_leaf_fn=collapsed_leaf, quoted_node_names=True), quoted_node_names=True)
print(t2)
#    /-
# --|
#   |   /-B
#    \-|
#       \-H
python ete3 • 1.7k views
ADD COMMENT
2
Entering edit mode
2.8 years ago
Kevin ▴ 20

Hi Andrzej,

I was looking for something similar and ended up developing the following code.

def name_internal_nodes(T):
    for node in T.traverse():
        if node.is_leaf()==False:
            # list names of leaves
            leaf_names=[leaf.name for leaf in node.iter_leaves()]
            names_unique = list(set(leaf_names))
            # if all leaves have the same name, give that name to the node
            if (len(names_unique)==1):
                node.name = names_unique[0]

Basically, this function adds a name to an internal node if all children leaves of that node have the same name. Then, collapsed_leaf will keep the internal name with its support value.

On your example, it gives the following:

t1 = Tree('(("A. thaliana":0.723274,"A. thaliana":0.567784):0.067192,("B":0.279326,"H":0.756049):0.807788);', quoted_node_names=True)
print(t1.get_ascii(show_internal=True))
#      /-A. thaliana
#   /-|
#  |   \-A. thaliana
#--|
#  |   /-B
#   \-|
#      \-H
name_internal_nodes(t1)
print(t1.get_ascii(show_internal=True))
#              /-A. thaliana
#   /A. thaliana
#  |           \-A. thaliana
#--|
#  |   /-B
#   \-|
#      \-H
# Cache node content
node2labels = t1.get_cached_content(store_attr="name")
# Collapse tree
t2 = Tree( t1.write(is_leaf_fn=collapsed_leaf, quoted_node_names=True), quoted_node_names=True)
print(t2.get_ascii(show_internal=True))
#   /-A. thaliana
#--|
#  |   /-B
#   \-|
#      \-H
print (t2.write())
#(A. thaliana:0.067192,(B:0.279326,H:0.756049)1:0.807788);
ADD COMMENT
0
Entering edit mode

Thank you, Kevin. This is exactly what I was looking for. You saved my life :)

ADD REPLY
0
Entering edit mode

Hi, I have tried to adapt this to collapse a tree based on the first string in leaf labels (being a genus name), but it is not being collapsed or renamed. Can you suggest what the problem might be?

Thanks for any help.

test.tree = (("Arabidopsis_thaliana1":0.723274,"Arabidopsis_thaliana2":0.567784):0.067192,("B":0.279326,"H":0.756049):0.807788);
import ete3
import sys

def name_internal_nodes(T):

    for node in T.traverse():
        if node.is_leaf()==False:
            # list names of leaves
            leaf_names=[leaf.name.split("_")[0] for leaf in node.iter_leaves()]

            names_unique = list(set(leaf_names))
            print(names_unique)
            # if all leaves have the same name, give that name to the node
            if (len(names_unique)==1):
                node.name = names_unique[0]

    return T

def collapsed_leaf(node):
    if len(node2labels[node]) == 1:
        return True
    else:
        return False

#global
t1 = ete3.Tree(sys.argv[1], quoted_node_names=True)
node2labels = t1.get_cached_content(store_attr="name")
g=open(sys.argv[2],'w')
print(t1.get_ascii(show_internal=True))
t2=name_internal_nodes(t1)
print(t2.get_ascii(show_internal=True))
node2labels = t2.get_cached_content(store_attr="name")
t3 = ete3.Tree( t2.write(is_leaf_fn=collapsed_leaf, quoted_node_names=True), quoted_node_names=True)
print(t3.get_ascii(show_internal=True))
g.write(str(t3.write()))
ADD REPLY
0
Entering edit mode

Ok, think I solved it - had to also rename the leaves:

#!/usr/bin/env python3


import ete3
import sys

def name_internal_nodes(T):

    for node in T.traverse():
        if node.is_leaf()==False:
            # list names of leaves
            leaf_names=[leaf.name.split("_")[0] for leaf in node.iter_leaves()]

            c=-1
            for leaf in node.iter_leaves():
                c=c+1
                leaf.name=leaf_names[c]

            names_unique = list(set(leaf_names))
            print(names_unique)
            # if all leaves have the same name, give that name to the node
            if (len(names_unique)==1):
                node.name = names_unique[0]

    return T

def collapsed_leaf(node):
    if len(node2labels[node]) == 1:
        return True
    else:
        return False

#global
t1 = ete3.Tree(sys.argv[1], quoted_node_names=True)
node2labels = t1.get_cached_content(store_attr="name")
g=open(sys.argv[2],'w')
print(t1.get_ascii(show_internal=True))
t2=name_internal_nodes(t1)
print(t2.get_ascii(show_internal=True))
node2labels = t2.get_cached_content(store_attr="name")
t3 = ete3.Tree(t2.write(is_leaf_fn=collapsed_leaf, quoted_node_names=True), quoted_node_names=True)
print(t3.get_ascii(show_internal=True))
g.write(str(t3.write()))
ADD REPLY

Login before adding your answer.

Traffic: 2517 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6