Logo Search packages:      
Sourcecode: kalign version File versions  Download package

kalign2_tree.c

/*
      kalign2_tree.c 
      
      Released under GPL - see the 'COPYING' file   
      
      Copyright (C) 2006 Timo Lassmann <timolassmann@gmail.com>
      
      This program is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published by
      the Free Software Foundation; either version 2 of the License, or
      any later version.

      This program is distributed in the hope that it will be useful,
      but WITHOUT ANY WARRANTY; without even the implied warranty of
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      GNU General Public License for more details.

      You should have received a copy of the GNU General Public License
      along with this program; if not, write to the Free Software
      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    
      Please send bug reports, comments etc. to:
      timolassmann@gmail.com
*/

#include "kalign2.h"

struct aln_tree_node* real_upgma(float **dm,int ntree)
{
      int i,j;
      int *as = 0;
      float max;
      int node_a = 0;
      int node_b = 0;
      int cnode = numseq;
      
      struct aln_tree_node** tree = 0;
      struct aln_tree_node* tmp = 0;
      
      as = malloc(sizeof(int)*numseq);
      for (i = numseq; i--;){
            as[i] = i+1;
      }
      
      tree = malloc(sizeof(struct aln_tree_node*)*numseq);
      for (i=0;i < numseq;i++){
            tree[i] = malloc(sizeof(struct aln_tree_node));
            tree[i]->done = 1;
            tree[i]->num = i;
            tree[i]->path = 0;
            tree[i]->profile = 0;
            tree[i]->seq = 0;//seq[i];
            tree[i]->len = 0;//len[i]; 
            /*
            Needs to be +2 because:
            at n = 3 is is possible to get a perfectly balanced binary tree with 4 sequences at intermediate nodes
            */
            /*tree[i]->links = malloc(sizeof(struct aln_tree_node*)*2);
            
            for ( j =0;j < 2;j++){
                  tree[i]->links[j] = 0;
            }*/
            
            tree[i]->internal_lables = malloc(sizeof(int)*(ntree+(ntree-1)));
            tree[i]->links = malloc(sizeof(struct aln_tree_node*)*(ntree+(ntree-1)));
            
            for ( j =0;j < (ntree+(ntree-1));j++){
                  tree[i]->links[j] = 0;
                  tree[i]->internal_lables[j] = 0;
            }
      }
      
      while (cnode != numprofiles){
            max = -INFTY;
            for (i = 0;i < numseq-1; i++){
                  if (as[i]){
                  for ( j = i + 1;j < numseq;j++){
                        if (as[j]){
                        if (dm[i][j] > max){
                              max = dm[i][j];
                              node_a = i;
                              node_b = j;
                        }
                        }
                  }
                  }
            }
            tmp = malloc(sizeof(struct aln_tree_node));
            tmp->done = 0;
            tmp->path = 0;
            tmp->profile = 0;
            tmp->num = cnode;
            tmp->seq = 0;
            tmp->len = 0;
            
            tmp->links = malloc(sizeof(struct aln_tree_node*)*(ntree+(ntree-1)));
            tmp->internal_lables = malloc(sizeof(int)*(ntree+(ntree-1)));
            tmp->links[0] = tree[node_a];
            tmp->links[1] = tree[node_b];
            tmp->internal_lables[0] = cnode;
            tmp->internal_lables[1] = 0;
            
            for ( i =2;i < (ntree+(ntree-1));i++){
                  tmp->links[i] = 0;
                  tmp->internal_lables[i] = 0;
                  
            }
            
            
            tree[node_a] = tmp;
            tree[node_b] = 0;
                        
            /*deactivate  sequences to be joined*/
            as[node_a] = cnode+1;
            as[node_b] = 0;
            cnode++;    
            
            /*calculate new distances*/
            for (j = numseq;j--;){
                  if (j != node_b){
                        dm[node_a][j] = (dm[node_a][j] + dm[node_b][j])*0.5;
                  }
            }
            dm[node_a][node_a] = 0.0f;
            for (j = numseq;j--;){
                  dm[j][node_a] = dm[node_a][j];
                  dm[j][node_b] = 0.0f;
                  dm[node_b][j] = 0.0f;
            }           
      }
      tmp = tree[node_a];
      
      for (i = numseq;i--;){
            free(dm[i]);
      }
      free(dm);
      
      
      free(tree);
      free(as);
      return tmp;
}

struct aln_tree_node* real_nj(float **dm,int ntree)
{
      int i,j;
      //float **dm = 0;
      float *r = 0;
      float *r_div = 0;
      int *active = 0;
      int node = 0;
      float min = 0;
      int join_a = 0;
      int join_b = 0;
      int leaves = 0;
      
      struct aln_tree_node** tree = 0;
      struct aln_tree_node* tmp = 0;

      leaves = numseq;
      
      r = malloc ((numseq*2-1) *sizeof(float));
      r_div = malloc ((numseq*2-1) *sizeof(float));
      active = malloc((numseq*2-1)*sizeof(int));
      for ( i = 0;i < numseq*2-1;i++){
            active[i] = 0;
      }
      for ( i = 0;i < numseq;i++){
            active[i] = 1;
      }
      
      
      tree = malloc(sizeof(struct aln_tree_node*)*(numseq*2-1));
      for (i=0;i < numseq*2-1;i++){
            tree[i] = malloc(sizeof(struct aln_tree_node));
            tree[i]->done = 1;
            tree[i]->num = i;
            tree[i]->path = 0;
            tree[i]->profile = 0;
            tree[i]->seq = 0;//seq[i];
            tree[i]->len = 0;//len[i];    
            tree[i]->internal_lables = malloc(sizeof(int)*(ntree+(ntree-1)));
            tree[i]->links = malloc(sizeof(struct aln_tree_node*)*(ntree+(ntree-1)));
            
            for ( j =0;j < (ntree+(ntree-1));j++){
                  tree[i]->links[j] = 0;
                  tree[i]->internal_lables[j] = 0;
            }
      }
      
      node = numseq;
      while (node != numseq*2 -1){
            for (i = 0;i<numseq*2-1;i++){
                  if (active[i]){
                        r[i] = 0;
                        for (j = 0;j < numseq*2-1;j++){
                              if (active[j]){
                                    r[i] += (i<j) ?dm[i][j]:dm[j][i];
                              }
                        }
                        r_div[i] = r[i] / (leaves-2);
                  }
            }
            for ( j = 0;j < numseq*2-1;j++){
                  if (active[j]){
                  for ( i = j+1;i < numseq*2-1;i++){
                        if (active[i]){
                        dm[i][j] = dm[j][i] - (r[i] + r[j])/2;
                        }
                  }
                  }
            }
            min = -INFTY;
            for ( j = 0;j < numseq*2-1;j++){
                  if (active[j]){
                  for ( i = j+1;i < numseq*2-1;i++){
                        if (active[i]){
                              if (dm[i][j] > min){
                                    min = dm[i][j];
                                    join_a = j;
                                    join_b = i;
                              }
                        }
                  }
                  }
            }
            //join_a always smaller than join_b && both smaller than node
            dm[join_a][node] =  dm[join_a][join_b]/2 + (r_div[join_a] - r_div[join_b])/2;
            dm[join_b][node] =  dm[join_a][join_b] - dm[join_a][node];
            
            tree[node]->num = node;
            tree[node]->links[0] = tree[join_a];
            tree[node]->links[1] = tree[join_b];
            tree[node]->internal_lables[0] = node;
            tree[node]->internal_lables[1] = 0;


            active[join_a] = 0;
            active[join_b] = 0;
            
            for (i = 0;i<numseq*2-1;i++){
                  if (active[i]){
                        dm[i][node] = (i>join_a) ? dm[join_a][i]: dm[i][join_a];
                        dm[i][node] -= dm[join_a][node];
                        dm[i][node] += (i > join_b) ? dm[join_b][i] : dm[i][join_b] ;
                        dm[i][node] -= dm[join_b][node];
                        dm[i][node] /= 2;
                  }
            }
            active[node] = 1;
            node++;
      }

      for (i = numprofiles;i--;){
            free(dm[i]);
      }
      free(dm);

      free(r);
      free(r_div);
      free(active);
      tmp = tree[node-1];
      free(tree);
      return tmp;
}

struct ntree_data* alignntree(struct ntree_data* ntree_data,struct aln_tree_node* p)
{
      int i = 0;
      int ntree = ntree_data->ntree;
      int* leaves = 0;
      
      leaves = malloc(sizeof(int)* (ntree+(ntree-1)));

      while(p->links[i]){
            alignntree(ntree_data,p->links[i]);
            i++;  
      }
      i = 0;
      if (p->links[i]){
            fprintf(stderr,"Aligning subtree: at node:%d\n",p->num);
            while(p->links[i]){
                  leaves[i] = p->links[i]->num;
                  i++;
            }
            leaves[i] = -1;
      //    fprintf(stderr,"NODES:%d\n",i);
            ntree_data =  find_best_topology(ntree_data,leaves,p->internal_lables);
      //    exit(0);
      }
      free(leaves);
      
      return ntree_data;
}


void print_simple_phylip_tree(struct aln_tree_node* p)
{
      if(p->links[0]){
      
            fprintf(stderr,"(");
            print_simple_phylip_tree(p->links[0]);
      }
      if(p->num < numseq){
            fprintf(stderr,"%d",p->num);
      }else{
            fprintf(stderr,",");
      }
      if(p->links[1]){
            print_simple_phylip_tree(p->links[1]);
            fprintf(stderr,")");
      }
}


void printtree(struct aln_tree_node* p)
{
      int i = 0;

      while(p->links[i]){
            printtree(p->links[i]);
            i++;  
      }
      i = 0;
      if (p->links[i]){
            printf("Aligning: at node:%d\n",p->num);
            while(p->links[i]){
                  printf("%d\n",p->links[i]->num);
                  i++;
            }
            i = 0;
            while(p->internal_lables[i]){
                  printf("%d ",p->internal_lables[i]);
                  i++;
            }
            printf("\n");
      }
}

void ntreeify(struct aln_tree_node* p,int ntree)
{
      int i = 0;
      int c = 0;
      struct aln_tree_node* tmp1 = 0;
      struct aln_tree_node* tmp2 = 0;
      if (p->links[0]){
            ntreeify(p->links[0],ntree);
      }
      if (p->links[1]){
            ntreeify(p->links[1],ntree);
      }
      
      if (!p->done){
            tmp1 = p->links[0];
            tmp2 = p->links[1];
            
            p->done = tmp1->done + tmp2->done;
            i = 0;
            c = 0;
            if(tmp1->done != 1){

                  while(tmp1->internal_lables[i]){
                        p->internal_lables[c] = tmp1->internal_lables[i];
                        i++;
                        c++;
                  }
                  if(tmp2->done != 1){
                        i = 0;
                        while(tmp2->internal_lables[i]){
                              p->internal_lables[c] = tmp2->internal_lables[i];
                              c++;
                              i++;
                        }
                  }
            }else if(tmp2->done != 1){
                  i = 0;
                  while(tmp2->internal_lables[i]){
                        p->internal_lables[c] = tmp2->internal_lables[i];
                        c++;
                        i++;
                  }
            }
            p->internal_lables[c] = p->num;
            
            //fprintf(stderr,"%d:%d %d:%d       %d\n",tmp1->num,tmp1->internal_lables[0],tmp2->num,tmp2->internal_lables[0],p->num);
            /*for (i = 0; i< c;i++){
                  fprintf(stderr,"il:%d ",p->internal_lables[i]);
            }
            fprintf(stderr,"\n");*/
      
            
            if (tmp1->done > 1){
                  for ( i = 0;i < tmp1->done;i++){
                        p->links[i] = tmp1->links[i];
                        tmp1->links[i] = 0;                       
                  }
            }
            
            if (tmp2->done > 1){
                  for ( i = 0; i < tmp2->done;i++){
                        p->links[tmp1->done+i] = tmp2->links[i];
                        tmp2->links[i] = 0;
                  }
                  free(tmp2->internal_lables);
                  free(tmp2->links);
                  free(tmp2);
            }else{
                  p->links[tmp1->done] = tmp2;
            }
      //    fprintf(stderr,"p->num:%d\n",p->num);
            p->links[p->done] = 0;
            
            if (tmp1->done > 1){
                  free(tmp1->internal_lables);
                  free(tmp1->links);
                  free(tmp1);
            }
            
            if (p->done >= ntree){ 
                  p->done = 1;
                  /*i = 0;
                  while(p->internal_lables[i]){
                        i++;
                  }
                  p->internal_lables[i] = p->num;*/
            }
      }
}

struct ntree_data* find_best_topology(struct ntree_data* ntree_data,int* leaves,int* nodes)
{
      int i,c;
      int elements = 0;
      //int num_topologies =0;
      int* milometer = 0; //DURBIN
      struct tree_node* tree = 0;
      struct tree_node* tmp = 0;
      int newnode = 0;
      int local_ntree = 0;
      
      int *tmp_tree = 0;

      while(leaves[local_ntree] != -1){
            local_ntree++;
      }
      //fprintf(stderr,"REALKDASF KJAF SA:%d\n",local_ntree);

      //for (i = 0; i < local_ntree-1;i++){
      //    fprintf(stderr,"nodes:%d\n",nodes[i]);
      //}
      

      tmp_tree = malloc(sizeof(int)*(local_ntree+local_ntree-1)*3);
      for (c = 0; c < (local_ntree+local_ntree-1)*3;c++){
            tmp_tree[c] = 0;
      }
      
      tmp_tree[0] =1;
      
      
      if (local_ntree < 3){
            //printf("ORDER1: %d    and   %d\n",leaves[0],leaves[1]);
            tmp_tree[0] =1;
      
            tmp = malloc(sizeof(struct tree_node));
            tmp->left = 0;
            tmp->right = 0;
            tmp->label = -1;
            tmp->edge = 0;
      
            tmp->left = malloc(sizeof(struct tree_node));
            tmp->left->left = 0;
            tmp->left->right = 0;
            tmp->left->edge = 1;
            tmp->left->label = leaves[0];
            tmp->right = malloc(sizeof(struct tree_node));
            tmp->right->left = 0;
            tmp->right->right = 0;
            tmp->right->edge = 2;
            tmp->right->label = leaves[1];
            tree = malloc(sizeof(struct tree_node));
            tree->left =tmp;
            tree->right = 0;
            tree->edge = -1;
            tree->label = -1;

            c =  add_label_simpletree(tree,nodes,0);
            readsimpletree(tree,tmp_tree);
            /*for (c = 1; c < tmp_tree[0];c++){
                  fprintf(stderr,"%d ",tmp_tree[c]);
            }
            fprintf(stderr,"\n\n");*/
            ntree_data =ntree_sub_alignment(ntree_data,tmp_tree,local_ntree);
            free(tmp_tree);
            
      }else{
            elements = local_ntree-2;
            milometer = malloc(sizeof(int)*(elements));
            for ( i = 0; i < elements;i++){
                  milometer[i] = 0;
            }

            i = 0;
            while(milometer[0] != -1){
      
                  tmp_tree[0] =1;
      
                  tmp = malloc(sizeof(struct tree_node));
                  tmp->left = 0;
                  tmp->right = 0;
                  tmp->label = -1;
                  tmp->edge = 0;
      
                  tmp->left = malloc(sizeof(struct tree_node));
                  tmp->left->left = 0;
                  tmp->left->right = 0;
                  tmp->left->edge = 1;
                  tmp->left->label = leaves[0];
                  tmp->right = malloc(sizeof(struct tree_node));
                  tmp->right->left = 0;
                  tmp->right->right = 0;
                  tmp->right->edge = 2;
                  tmp->right->label = leaves[1];
                  tree = malloc(sizeof(struct tree_node));
                  tree->left =tmp;
                  tree->right = 0;
                  tree->edge = -1;
                  tree->label = -1;
            
                  //printsimpleTree(tree);
                  //tree = simpleinsert(tree,0,3,-3);
                  //fprintf(stderr,"\n\n");
                  //printsimpleTree(tree);
                  newnode = 3;
                  for(c = 0; c < elements;c++){             
                  //    printf("%d ",milometer[c]);
                        tree = simpleinsert(tree,milometer[c],newnode,leaves[2+c]);
                        newnode+=2;
                  } 
                  fprintf(stderr,"Topology:%d   ",i);
                  //printsimpleTree(tree);
                  c = add_label_simpletree(tree,nodes,0);
                  
                  readsimpletree(tree,tmp_tree);
                  freesimpletree(tree);
                  /*for (c = 1; c < tmp_tree[0];c++){
                        fprintf(stderr,"%d ",tmp_tree[c]);
                  }
                  fprintf(stderr,"\n\n");*/
                  ntree_data =ntree_sub_alignment(ntree_data,tmp_tree,local_ntree);
                  
                  //exit(0);
                  //for (c = 0;c < ntree -1;c++){
                  //    fprintf(stderr,"%d ",nodes[c]);
                  //}
                  //fprintf(stderr,"\n\n");
                  i++;
                  milometer = ticker(milometer,elements);
            }
      
            free(milometer);
            free(tmp_tree);
      }
      return ntree_data;
}

int add_label_simpletree(struct tree_node* p,int* nodes,int i)
{
      if(p->left){
            i = add_label_simpletree(p->left,nodes,i);
      }
      if(p->right){
            i = add_label_simpletree(p->right,nodes,i);
      }
      if(p->left){
            if(p->right){
                  p->label = nodes[i];
                  i++;
                  return i;
            }
      }
      return i;
}

int* readsimpletree(struct tree_node* p,int* tree)
{
      if(p->left){
            tree = readsimpletree(p->left,tree);
      }
      if(p->right){
            tree = readsimpletree(p->right,tree);
      }
      if(p->left){
            if(p->right){
                  tree[tree[0]] = p->left->label;
                  tree[tree[0]+1] = p->right->label;
                  tree[tree[0]+2] = p->label;
                  tree[0] +=3;
      //          free(p->left);
      //          free(p->right);
      //    }else{
      //          free(p->left);
            }
      }//else{
      //    free(p->right);
      //}
      return tree;
}

void printsimpleTree(struct tree_node* p)
{
      if(p->left){
      printsimpleTree(p->left);
      }
      //fprintf(stderr,"%d\n",p->label);
      if(p->right){
      printsimpleTree(p->right);
      }
      if(p->left){
            if(p->right){
                  fprintf(stderr,"%d %d -> %d\n",p->left->label,p->right->label,p->label);
                  free(p->left);
                  free(p->right);
            }else{
                  free(p->left);
            }
      }else{
            free(p->right);
      }
      
//    fprintf(stderr,"Edge:%d Label:%d\n",p->edge,p->label);
}


struct tree_node* simpleinsert(struct tree_node* p,int target, int new_edge,int leaf_label)
{
      struct tree_node* tmp = 0;
      struct tree_node* tmp2 = 0;

      if(p->left){
            if(p->left->edge == target){
                  tmp = malloc(sizeof(struct tree_node));
                  tmp->left = 0;
                  tmp->right = 0;
                  tmp->label = leaf_label;
                  tmp->edge = new_edge+1;
                  
                  tmp2 = malloc(sizeof(struct tree_node));
                  tmp2->left = tmp;
                  tmp2->right = p->left;
                  tmp2->label = -1;
                  tmp2->edge = p->left->edge;

                  p->left->edge = new_edge;

                  p->left = tmp2;


                  return p;
            }else{
                  p->left = simpleinsert(p->left,target,new_edge,leaf_label);
            }
      }

      if(p->right){
            if(p->right->edge == target){
                  tmp = malloc(sizeof(struct tree_node));
                  tmp->left = 0;
                  tmp->right = 0;
                  tmp->label = leaf_label;
                  tmp->edge = new_edge+1;
                  
                  tmp2 = malloc(sizeof(struct tree_node));
                  tmp2->left = tmp;
                  tmp2->right = p->right;
                  tmp2->label = -1;
                  tmp2->edge = p->right->edge;

                  p->right->edge = new_edge;

                  p->right = tmp2;


                  return p;
            }else{
                  p->right = simpleinsert(p->right,target,new_edge,leaf_label);
            }
      }
      return p;
}


int* ticker(int* milometer,int elements)
{
      while(elements){
            if (milometer[elements-1] < (2*elements)){
                  milometer[elements-1]++;
                  return milometer;
            }else{
                  milometer[elements-1] = 0;
                  elements--;
            }
      }
      milometer[0] = -1;
      return milometer;
}




int* upgma(float **dm,int* tree)
{
      int i,j,t;
      int *as = 0;
      float max;
      int node_a = 0;
      int node_b = 0;
      int cnode = numseq;

      as = malloc(sizeof(int)*numseq);
      for (i = numseq; i--;){
            as[i] = i+1;
      }

      
      t = 0;
      while (cnode != numprofiles){
            max = -INFTY;
            for (i = 0;i < numseq-1; i++){
                  if (as[i]){
                  for ( j = i + 1;j < numseq;j++){
                        if (as[j]){
                        if (dm[i][j] > max){
                              max = dm[i][j];
                              node_a = i;
                              node_b = j;
                        }
                        }
                  }
                  }
            }
            
            tree[t] = as[node_a]-1;
            tree[t+1] = as[node_b]-1;
            tree[t+2] = cnode;
            t += 3;     
            
            /*deactivate  sequences to be joined*/
            as[node_a] = cnode+1;
            as[node_b] = 0;
            cnode++;    
            
            /*calculate new distances*/
            for (j = numseq;j--;){
                  if (j != node_b){
                        dm[node_a][j] = (dm[node_a][j] + dm[node_b][j])/2;
                  }
            }
            dm[node_a][node_a] = 0.0f;
            for (j = numseq;j--;){
                  dm[j][node_a] = dm[node_a][j];
                  dm[j][node_b] = 0.0f;
                  dm[node_b][j] = 0.0f;
            }           
      }
      free(as);
      return tree;
}



int* nj(float **dm,int* tree)
{
      int i,j;
      //float **dm = 0;
      float *r = 0;
      float *r_div = 0;
      int *active = 0;
      int node = 0;
      float min = 0;
      int join_a = 0;
      int join_b = 0;
      int leaves = 0;
      int c =0;

      leaves = numseq;
      
      r = malloc ((numseq*2-1) *sizeof(float));
      r_div = malloc ((numseq*2-1) *sizeof(float));
      active = malloc((numseq*2-1)*sizeof(int));
      for ( i = 0;i < numseq*2-1;i++){
            active[i] = 0;
      }
      for ( i = 0;i < numseq;i++){
            active[i] = 1;
      }
      
      node = numseq;
      while (node != numseq*2 -1){
            for (i = 0;i<numseq*2-1;i++){
                  if (active[i]){
                        r[i] = 0;
                        for (j = 0;j < numseq*2-1;j++){
                              if (active[j]){
                                    r[i] += (i<j) ?dm[i][j]:dm[j][i];
                              }
                        }
                        r_div[i] = r[i] / (leaves-2);
                  }
            }
            for ( j = 0;j < numseq*2-1;j++){
                  if (active[j]){
                  for ( i = j+1;i < numseq*2-1;i++){
                        if (active[i]){
                        dm[i][j] = dm[j][i] - (r[i] + r[j])/2;
                        }
                  }
                  }
            }
            min = -INFTY;
            for ( j = 0;j < numseq*2-1;j++){
                  if (active[j]){
                  for ( i = j+1;i < numseq*2-1;i++){
                        if (active[i]){
                              if (dm[i][j] > min){
                                    min = dm[i][j];
                                    join_a = j;
                                    join_b = i;
                              }
                        }
                  }
                  }
            }
            //join_a always smaller than join_b && both smaller than node
            dm[join_a][node] =  dm[join_a][join_b]/2 + (r_div[join_a] - r_div[join_b])/2;
            dm[join_b][node] =  dm[join_a][join_b] - dm[join_a][node];

            active[join_a] = 0;
            active[join_b] = 0;
            tree[c] = join_a;
            tree[c+1] = join_b;
            tree[c+2] = node;

            for (i = 0;i<numseq*2-1;i++){
                  if (active[i]){
                        dm[i][node] = (i>join_a) ? dm[join_a][i]: dm[i][join_a];
                        dm[i][node] -= dm[join_a][node];
                        dm[i][node] += (i > join_b) ? dm[join_b][i] : dm[i][join_b] ;
                        dm[i][node] -= dm[join_b][node];
                        dm[i][node] /= 2;
                  }
            }
            active[node] = 1;
            c += 3;
            node++;

      }


      for (i = numprofiles;i--;){
            free(dm[i]);
      }
      free(dm);

      free(r);
      free(r_div);
      free(active);

      return tree;
}



Generated by  Doxygen 1.6.0   Back to index