/* * $Id: hash_alph.c 4278 2009-12-23 09:58:37Z james_johnson $ * * $Log$ * Revision 1.2 2005/10/25 19:06:39 nadya * rm old macro for Header, all info is taken care of by Id and Log. * * Revision 1.1.1.1 2005/07/29 00:20:12 nadya * Importing from meme-3.0.14, and adding configure/make * */ /*********************************************************************** * * * MEME++ * * Copyright 1994, The Regents of the University of California * * Author: Timothy L. Bailey * * * * Modified: tlb 11-12-97; ***********************************************************************/ /* hash_alph */ #include "user.h" #include "macros.h" #include "hash_alph.h" /* Routines for hashing on the alphabet of sequences. */ /**********************************************************************/ /* setup_hash_alph Sets up the hash table for the given alphabet. There are three hash tables: x alphabet -- -------- 0: DNAB 1: PROTEINB 2: other (including DNA0 and PROTEIN0) This function can be called once for each of these alphabets, and then the current alphabet can be selected using setalph(x). Unused letters hash to -1. unhash(alength) will give 'X' (only works for alphabet set up last). If the alphabet is DNA0 or PROTEIN0, all ambiguous characters in the BLAST alphabet are hashed to 'X'. */ /**********************************************************************/ extern int setup_hash_alph( char *alphabet /* the alphabet to set up hashing for */ ) { int i, alength; char c; /* get length of alphabet */ alength = strlen(alphabet); if (alength > MAXALPH) { fprintf(stderr, "Alphabet too long (> %d).\n", MAXALPH); exit(1); } /* determine if current alphabet DNAB or PROTEINB or other */ if (!strcmp(alphabet, DNAB)) { setalph(0); /* current alphabet DNAB */ } else if (!strcmp(alphabet, PROTEINB)) { setalph(1); /* current alphabet PROTEINB */ } else { setalph(2); /* current alphabet other */ } /* flag unused letters */ for (i=0; i subs[protbhash(L)]=1*/ char substr[MAXALPH+1]; /* string of substitution letters for codon */ char *protalph = PROTEINB; /* PROTEINB alphabet */ int protalen = strlen(PROTEINB);/* length of PROTEINB alphabet */ char *dnalph = DNAB; /* DNAB alphabet */ char aa; /* amino acid letter */ /* set length of DNAB alphabet for use by macro dnab2protb */ dnablen = strlen(DNAB); /* create the dnab2protb index table */ dnab2protb_index = NULL; Resize(dnab2protb_index, dnablen*dnablen*dnablen, int); /* For each possible DNAB codon, find all the translations of it (more than one if it contains ambiguous DNAB characters) and see if if matches one of the PROTEINB characters. Otherwise, map it to "X". Store the position in PROTEINB alphabet in the index table. */ for (i0=0; i0