/************************************************************************** * FILE: string-list.c * AUTHOR: William Stafford Noble * CREATE DATE: 12-22-98 * PROJECT: shared * COPYRIGHT: 1999-2008, WSN * VERSION: $Revision: 1.5 $ * DESCRIPTION: Data structure for manipulating a list of strings. **************************************************************************/ #include "string-list.h" #include "utils.h" #include #include /************************************************************************* * Primary data structure. *************************************************************************/ struct string_list_t { int num_strings; /* Number of strings in the array. */ int max_strings; /* Total amount of memory allocated. */ int longest_string; /* Length of the longest allowed string. */ char** strings; /* The strings themselves. */ double* scores; /* Scores for optional sorting of strings */ }; /************************************************************************* * Allocate dynamic memory for a string list. *************************************************************************/ #define DEFAULT_MAX_STRINGS 10 #define DEFAULT_STRING_LENGTH 1000 STRING_LIST_T* new_string_list () { STRING_LIST_T* new_list; /* The list being created. */ int i_string; /* Index into the list. */ new_list = (STRING_LIST_T*)mm_calloc(1, sizeof(STRING_LIST_T)); new_list->num_strings = 0; new_list->max_strings = DEFAULT_MAX_STRINGS; new_list->longest_string = DEFAULT_STRING_LENGTH; new_list->strings = (char**)mm_calloc(DEFAULT_MAX_STRINGS, sizeof(char*)); for (i_string = 0; i_string < DEFAULT_MAX_STRINGS; i_string++) { new_list->strings[i_string] = (char*)mm_calloc(DEFAULT_STRING_LENGTH + 1, sizeof(char)); } new_list->scores = (double*)mm_calloc(DEFAULT_MAX_STRINGS, sizeof(double)); return(new_list); } /************************************************************************* * Is the given list a null list? *************************************************************************/ static void check_null_list (STRING_LIST_T* a_list) { if (a_list == NULL) { die("Attempted to access null string list.\n"); } } /************************************************************************* * Get the length of the longest string in a list. *************************************************************************/ int max_string_length (STRING_LIST_T* a_list) { check_null_list(a_list); return(a_list->longest_string); } /************************************************************************* * Get the total number of strings in a list. *************************************************************************/ int get_num_strings (STRING_LIST_T* a_list) { check_null_list(a_list); return(a_list->num_strings); } /************************************************************************* * Make sure each string in the list is long enough. *************************************************************************/ static void resize_string_list (int new_length, STRING_LIST_T* a_list) { int i_string; if (new_length > a_list->longest_string) { a_list->longest_string = new_length + 1; for (i_string = 0; i_string < a_list->max_strings; i_string++) { a_list->strings[i_string] = (char*)mm_realloc(a_list->strings[i_string], a_list->longest_string * sizeof(char)); } } } /************************************************************************* * Get the nth string from a list. *************************************************************************/ char* get_nth_string ( int n, STRING_LIST_T* a_list) { check_null_list(a_list); if (n > a_list->max_strings) { die("Attempted to access string beyond end of list.\n"); } else if (n > a_list->num_strings) { die("Attempted to access uninitialized string.\n"); } assert(a_list->strings[n] != NULL); return(a_list->strings[n]); } void set_nth_string (char* new_string, int n, STRING_LIST_T* a_list) { /* Check bounds. */ get_nth_string(n, a_list); /* Make all the strings longer if this one is too long. */ resize_string_list((int)strlen(new_string), a_list); /* Copy the string into the list. */ strcpy(a_list->strings[n], new_string); } /************************************************************************* * Get nth score in a scored string list. *************************************************************************/ double get_nth_score (int n, STRING_LIST_T* a_list) { check_null_list(a_list); if (n > a_list->max_strings) { die("Attempted to access string beyond end of list.\n"); } else if (n > a_list->num_strings) { die("Attempted to access uninitialized string.\n"); } return(a_list->scores[n]); } void set_nth_score (double new_score, int n, STRING_LIST_T* a_list) { // Check bounds. get_nth_string(n, a_list); // Copy the score into the list. a_list->scores[n] = new_score; } /************************************************************************* * Extract an array of scores. Memory must be freed by the caller. *************************************************************************/ ARRAY_T* get_string_list_scores (STRING_LIST_T* a_list) { ARRAY_T* return_value = allocate_array(get_num_strings(a_list)); int num_strings = get_num_strings(a_list); int i_string; for (i_string = 0; i_string < num_strings; i_string++) { set_array_item(i_string, a_list->scores[i_string], return_value); } return(return_value); } /************************************************************************* * Append to the nth string. *************************************************************************/ void append_to_nth_string (char* new_string, int n, STRING_LIST_T* a_list) { /* Check bounds. */ get_nth_string(n, a_list); /* Make all the strings longer if this one is too long. */ resize_string_list(a_list->longest_string + strlen(new_string), a_list); /* Copy the string into the list. */ strcat(a_list->strings[n], new_string); } /************************************************************************* * Add a string to the end of a given list. *************************************************************************/ void add_string (char* a_string, STRING_LIST_T* a_list) { int i_string; check_null_list(a_list); /* Make sure we're not adding a null string. */ if (a_string == NULL) { die("Adding null string to string list."); } /* Reallocate space if there isn't any. */ if (a_list->num_strings >= a_list->max_strings) { a_list->strings = (char**)mm_realloc(a_list->strings, (a_list->max_strings + DEFAULT_MAX_STRINGS) * sizeof(char*)); for (i_string = 0; i_string < DEFAULT_MAX_STRINGS; i_string++) { a_list->strings[a_list->max_strings + i_string] = (char*)mm_calloc(a_list->longest_string + 1, sizeof(char)); } a_list->max_strings += DEFAULT_MAX_STRINGS; a_list->scores = (double*)mm_realloc(a_list->scores, (a_list->max_strings + DEFAULT_MAX_STRINGS) * sizeof(double)); } /* Make all the strings longer if this one is too long. */ resize_string_list((int)strlen(a_string), a_list); /* Put the string in the list. */ strcpy(a_list->strings[a_list->num_strings], a_string); a_list->scores[a_list->num_strings] = 0; (a_list->num_strings)++; } /************************************************************************* * Add a string with score to the end of a given list. *************************************************************************/ void add_string_with_score (char* a_string, STRING_LIST_T* a_list, double score) { int index = a_list->num_strings; add_string(a_string, a_list); a_list->scores[index] = score; } /************************************************************************* * Add a list of strings to the end of a given list. *************************************************************************/ void add_strings (STRING_LIST_T* source_list, STRING_LIST_T* target_list) { int i_string; check_null_list(source_list); check_null_list(target_list); for (i_string = 0; i_string < get_num_strings(source_list); i_string++) { add_string(get_nth_string(i_string, source_list), target_list); } } /************************************************************************* * Remove a string from a given list. * * It is an error if the string does not appear in the list. *************************************************************************/ void remove_string (char* a_string, STRING_LIST_T* a_list) { BOOLEAN_T found_it; /* Have we found the desired string? */ char* this_string; /* The current string. */ int i_string; /* Index of the current string.*/ check_null_list(a_list); /* Make sure the string exists in the list. */ if (!have_string(a_string, a_list)) { die("Attempted to remove string %s from list that doesn't contain it.\n", a_string); } found_it = FALSE; for (i_string = 0; i_string < get_num_strings(a_list) - 1; i_string++) { this_string = get_nth_string(i_string, a_list); /* Is this the string we are looking for? */ if (strcmp(this_string, a_string) == 0) { found_it = TRUE; } /* Copy the next string into the current location. */ if (found_it) { strcpy(this_string, get_nth_string(i_string + 1, a_list)); } } (a_list->num_strings)--; } /************************************************************************* * Remove a set of strings from a given list. * * It is an error for the source list to contain any strings that * don't appear in the target list. *************************************************************************/ void remove_strings (STRING_LIST_T* source_list, STRING_LIST_T* target_list) { int i_string; for (i_string = 0; i_string < get_num_strings(source_list); i_string++) { remove_string(get_nth_string(i_string, source_list), target_list); } } /************************************************************************* * Does a given string appear in a given list? *************************************************************************/ BOOLEAN_T have_string (char* a_string, STRING_LIST_T* a_list) { char* this_string; /* The current string in the list. */ int i_string; /* Index of the current string. */ check_null_list(a_list); for (i_string = 0; i_string < get_num_strings(a_list); i_string++) { this_string = get_nth_string(i_string, a_list); if (strcmp(this_string, a_string) == 0) { return(TRUE); } } return(FALSE); } /************************************************************************* * Return a index of a string in the given list. * Returns -1 if string is not in list. *************************************************************************/ int get_index_in_string_list(char* a_string, STRING_LIST_T* a_list) { char* this_string; /* The current string in the list. */ int i_string; /* Index of the current string. */ check_null_list(a_list); for (i_string = 0; i_string < get_num_strings(a_list); i_string++) { this_string = get_nth_string(i_string, a_list); if (strcmp(this_string, a_string) == 0) { return(i_string); } } // String was not found in list. return(-1); } /************************************************************************** * Make a copy of a string list. **************************************************************************/ STRING_LIST_T* copy_string_list (STRING_LIST_T* string_list) { int i_string; char* this_string; STRING_LIST_T* new_list; /* Create the new list. */ new_list = new_string_list(); /* Add stuff to it. */ for (i_string = 0; i_string < string_list->num_strings; i_string++) { this_string = get_nth_string(i_string, string_list); add_string(this_string, new_list); } /* Return it. */ return(new_list); } /************************************************************************* * A version of 'strcmp' suitable for passing to 'qsort'. *************************************************************************/ static int string_compare (const void * elem1, const void * elem2) { const char** string_ptr1 = (const char**)elem1; const char** string_ptr2 = (const char**)elem2; return(strcmp(*string_ptr1, *string_ptr2)); } /************************************************************************* * Compare the scores for 'qsort'. *************************************************************************/ /* I define this as a struct only so that the two items can be passed as a single pointer to the 'qsort' routine. */ typedef struct forqsort { double score; char* string; } FORQSORT_T; static int score_compare (const void * elem1, const void * elem2) { const double key1 = ((FORQSORT_T *)elem1)->score; const double key2 = ((FORQSORT_T *)elem2)->score; const char* string1 = ((FORQSORT_T *)elem1)->string; const char* string2 = ((FORQSORT_T *)elem2)->string; if (key1 < key2) { return(-1); } else if (key1 > key2) { return(1); } return(strcmp(string1, string2)); } static int score_compare_reverse (const void * elem1, const void * elem2) { return score_compare(elem2, elem1); } /************************************************************************* * Sort a list of strings in place. *************************************************************************/ void sort_string_list (STRING_LIST_T* a_list) { check_null_list(a_list); qsort((void *)(a_list->strings), a_list->num_strings, sizeof(char*), string_compare); } /************************************************************************* * Sort a list of strings by score. *************************************************************************/ void sort_string_list_by_score (STRING_LIST_T* a_list, BOOLEAN_T reverse) //Descending order { check_null_list(a_list); FORQSORT_T* forqsort; // Array of strings to be sorted. int num_stored = a_list->num_strings; // Number of strings to be sorted. int i_stored = 0; forqsort = (FORQSORT_T *)mm_malloc(sizeof(FORQSORT_T) * num_stored); for (i_stored = 0; i_stored < num_stored; i_stored++) { forqsort[i_stored].string = (char *)mm_malloc(sizeof(char) * (strlen(get_nth_string(i_stored, a_list)) + 1)); sprintf(forqsort[i_stored].string, "%s", get_nth_string(i_stored, a_list)); forqsort[i_stored].score = a_list->scores[i_stored]; } if (reverse) { qsort((void *)(forqsort), i_stored, sizeof(FORQSORT_T), score_compare_reverse); } else { qsort((void *)(forqsort), i_stored, sizeof(FORQSORT_T), score_compare); } // Transfer values in the original array for (i_stored = 0; i_stored < num_stored; i_stored++) { set_nth_string(forqsort[i_stored].string, i_stored, a_list); a_list->scores[i_stored] = forqsort[i_stored].score; } // Free up memory for the sorted sequences. for (i_stored = 0; i_stored < num_stored; i_stored++) { myfree(forqsort[i_stored].string); } myfree(forqsort); } /*************************************************************************** * Given two sets, A and B, find * - A intersect B, * - A minus B, and * - B minus A. * * Allocates memory for the three new string lists, which must be * freed by the caller. * * If any of the return parameters is supplied as NULL, then that * parameter will not be filled in ***************************************************************************/ void overlap_string_lists (STRING_LIST_T* list_a, STRING_LIST_T* list_b, STRING_LIST_T** intersection, STRING_LIST_T** a_minus_b, STRING_LIST_T** b_minus_a) { int i_all; /* Index of current string. */ char* this_string; /* The current string. */ check_null_list(list_a); check_null_list(list_b); /* Allocate memory for the new list. */ if (intersection != NULL) { *intersection = new_string_list(); } if (a_minus_b != NULL) { *a_minus_b = new_string_list(); } if (b_minus_a != NULL) { *b_minus_a = new_string_list(); } /* Enumerate the first list. */ for (i_all = 0; i_all < get_num_strings(list_a); i_all++) { this_string = get_nth_string(i_all, list_a); if (have_string(this_string, list_b)) { if (intersection != NULL) { add_string(this_string, *intersection); } } else { if (a_minus_b != NULL) { add_string(this_string, *a_minus_b); } } } /* Enumerate the second list. */ for (i_all = 0; i_all < get_num_strings(list_b); i_all++) { this_string = get_nth_string(i_all, list_b); if (!have_string(this_string, list_a)) { if (b_minus_a != NULL) { add_string(this_string, *b_minus_a); } } } } /************************************************************************* * Combine all the strings in a given list into a single string * separated by a given separator. * * Memory allocated for return value must be freed by caller. *************************************************************************/ char* combine_string_list (STRING_LIST_T* a_list, char* separator) { int string_length; /* Length of the combined string. */ char* combined_string; /* String to be returned. */ int i_string; /* Index of the current string. */ check_null_list(a_list); /* Compute the length of the return string. */ string_length = (get_num_strings(a_list) * (max_string_length(a_list) + strlen(separator))) + 2; /* Allocate memory for the string. */ combined_string = (char*)mm_calloc(string_length, sizeof(char)); /* Accumulate all the strings, with spaces between. */ for (i_string = 0; i_string < get_num_strings(a_list); i_string++) { strcat(combined_string, get_nth_string(i_string, a_list)); strcat(combined_string, separator); } /* Remove the final space. */ combined_string[strlen(combined_string) - strlen(separator)] = '\0'; /* Return the string. */ return(combined_string); } /***************************************************************************** * Read a list of strings from a given file. *****************************************************************************/ STRING_LIST_T* read_string_list (FILE* infile) { char this_line[DEFAULT_STRING_LENGTH]; char* fgets_result; STRING_LIST_T* return_value; /* Allocate dynamic memory for a maximal list of names. */ return_value = new_string_list(); /* Read the first name. */ fgets_result = fgets(this_line, DEFAULT_STRING_LENGTH, infile); this_line[strlen(this_line)-1] = 0; // chop /* Make sure we got at least one name. */ if (fgets_result == NULL) { die("Couldn't read a single name from the given file."); } while (fgets_result != NULL) { /* Store the name in the name list. */ add_string(this_line, return_value); /* Read the next name. */ fgets_result = fgets(this_line, DEFAULT_STRING_LENGTH, infile); this_line[strlen(this_line)-1] = 0; // chop } /* Die if we didn't read anything. */ if (get_num_strings(return_value) == 0) { die("Failed to read any names."); } /* Tell the user what's up. */ if (verbosity > NORMAL_VERBOSE) { fprintf(stderr, "Read %d names.\n", get_num_strings(return_value)); } return(return_value); } /***************************************************************************** * Read a list of strings from the named file. *****************************************************************************/ STRING_LIST_T* read_string_list_from_file (char* filename) { STRING_LIST_T* string_list; FILE* string_list_file = NULL; if (open_file( filename, "r", 1, "string list", "string list", &string_list_file ) == 0) { die("Couldn't open the file %s.\n", filename); } string_list = read_string_list(string_list_file); (void) fclose(string_list_file); if (verbosity > HIGH_VERBOSE) { fprintf(stderr, "Read string list: "); write_string_list(" ", string_list, stderr); } return string_list; } /************************************************************************* * Write out a list of strings. *************************************************************************/ void write_string_list (char* separator, /* String to separate strings in list. */ STRING_LIST_T* a_list, /* The list to be printed. */ FILE* outfile) { int i_string; check_null_list(a_list); /* Don't do anything if we go no strings. */ if (get_num_strings(a_list) == 0) { return; } for (i_string = 0; i_string < get_num_strings(a_list) - 1; i_string++) { fprintf(outfile, "%s%s", get_nth_string(i_string, a_list), separator); } fprintf(outfile, "%s\n", get_nth_string(i_string, a_list)); } /************************************************************************* * Free dynamic memory used by a list of strings. *************************************************************************/ void free_string_list (STRING_LIST_T* a_list) { int i_string; if (a_list == NULL) { return; } for (i_string = 0; i_string < a_list->max_strings; i_string++) { myfree(a_list->strings[i_string]); } myfree(a_list->strings); myfree(a_list->scores); myfree(a_list); } /************************************************************************* * Test two string lists for equality. Order matters. *************************************************************************/ BOOLEAN_T equal_string_lists (STRING_LIST_T* a_list, STRING_LIST_T* b_list) { int i_string; check_null_list(a_list); check_null_list(b_list); // If there are different numbers of strings, then they cannot be equal. if (get_num_strings(a_list) != get_num_strings(b_list)) { return(FALSE); } for (i_string = 0; i_string < get_num_strings(a_list); i_string++) { if (strcmp(get_nth_string(i_string, a_list), get_nth_string(i_string, b_list))) { return(FALSE); } } return(TRUE); } /************************************************************************* * Test whether a given string list contains duplicates. * * Optionally, print the duplicated strings to stderr. *************************************************************************/ BOOLEAN_T has_duplicates (char* message, // If message == "", then don't print the list to stderr. STRING_LIST_T* my_list) { BOOLEAN_T return_value = FALSE; BOOLEAN_T printed_message = FALSE; check_null_list(my_list); // Consider each string in the list. int i_string; int num_strings = get_num_strings(my_list); for (i_string = 0; i_string < num_strings; i_string++) { char* first_string = get_nth_string(i_string, my_list); // If a string appears more than twice, just print it the first time. BOOLEAN_T already_printed = FALSE; // Look for another occurrence of this string. int j_string; for (j_string = 0; j_string < num_strings; j_string++) { char* second_string = get_nth_string(j_string, my_list); // Are the strings the same, but with different indices? if ((i_string != j_string) && (strcmp(first_string, second_string) == 0)) { return_value = TRUE; // If requested, print the string. if ((strcmp(message, "") != 0) && (already_printed == FALSE)) { // Print the initial message, if one is given. if (printed_message == FALSE) { fprintf(stderr, "%s", message); printed_message = TRUE; } fprintf(stderr, " %s", first_string); already_printed = TRUE; } } } } // Print the final EOL. if (printed_message == TRUE) { fprintf(stderr, "\n"); } return(return_value); } /* * Local Variables: * mode: c * c-basic-offset: 2 * End: */