From aca3f4abae4013e48b2f407d92cdb17f54ffbf06 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Tue, 26 Nov 2002 12:48:39 +0000 Subject: [PATCH] Move the search algorithm to search.h, search.cc. --- ChangeLog | 21 ++ src/Makefile.in | 12 +- src/gen-perf.cc | 363 -------------------- src/gen-perf.h | 53 --- src/input.cc | 4 + src/key-list.cc | 380 --------------------- src/keyword.cc | 8 +- src/keyword.h | 4 +- src/main.cc | 57 +++- src/search.cc | 628 +++++++++++++++++++++++++++++++++++ src/{key-list.h => search.h} | 81 +++-- 11 files changed, 747 insertions(+), 864 deletions(-) delete mode 100644 src/gen-perf.cc delete mode 100644 src/gen-perf.h delete mode 100644 src/key-list.cc create mode 100644 src/search.cc rename src/{key-list.h => search.h} (69%) diff --git a/ChangeLog b/ChangeLog index 1ffbcf4..282526e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,26 @@ 2002-11-02 Bruno Haible + * src/search.h: New file, combines src/key-list.h, src/gen-perf.h. + * src/search,cc: New file, combines src/key-list.cc, src/gen-perf.cc. + * src/key-list.h: Remove file. + * src/key-list.cc: Remove file. + * src/gen-perf.h: Remove file. + * src/gen-perf.cc: Remove file. + * src/main.cc (KeywordExt_Factory): Moved here from gen-perf.cc. + (main): Inline some code from gen-perf.cc. + * src/keyword.h (KeywordExt::init_selchars): Take the occurrences + vector as argument. + * src/keyword.cc (KeywordExt::init_selchars): Take the occurrences + vector as argument. + * src/input.cc (Input::set_output_types): Initialize _array_type, + _return_type, _struct_tag. + (Input::read_keys): Initialize _additional_code. + * src/Makefile.in (OBJECTS): Add search.o. + Remove key-list.o, gen-perf.o. + (KEY_LIST_H, GEN_PERF_H): Remove variables. + (gen-perf.o, key-list.o): Remove rules. + (search.o): New rule. + * *, */*: Update copyright notice to GPL version 2. * src/keyword-list.h (Keyword_List): New class. diff --git a/src/Makefile.in b/src/Makefile.in index 5647748..7b2a50d 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -61,9 +61,9 @@ SHELL = /bin/sh VPATH = $(srcdir) -OBJECTS = options.o main.o gen-perf.o key-list.o \ +OBJECTS = options.o main.o \ hash-table.o bool-array.o read-line.o vectors.o version.o \ - keyword.o keyword-list.o output.o input.o + keyword.o keyword-list.o output.o input.o search.o LIBS = ../lib/libgp.a @GPERF_LIBM@ CPPFLAGS = -I. -I$(srcdir)/../lib @@ -89,19 +89,13 @@ VERSION_H = version.h VECTORS_H = vectors.h READ_LINE_H = read-line.h read-line.icc OPTIONS_H = options.h options.icc -KEY_LIST_H = key-list.h $(VECTORS_H) $(READ_LINE_H) HASH_TABLE_H = hash-table.h BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H) -GEN_PERF_H = gen-perf.h $(KEY_LIST_H) $(BOOL_ARRAY_H) bool-array.o : bool-array.cc $(BOOL_ARRAY_H) $(OPTIONS_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/bool-array.cc -gen-perf.o : gen-perf.cc $(GEN_PERF_H) $(OPTIONS_H) - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/gen-perf.cc hash-table.o : hash-table.cc $(HASH_TABLE_H) $(OPTIONS_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/hash-table.cc -key-list.o : key-list.cc $(KEY_LIST_H) $(OPTIONS_H) $(READ_LINE_H) $(HASH_TABLE_H) $(VERSION_H) - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/key-list.cc main.o : main.cc $(OPTIONS_H) $(GEN_PERF_H) $(CONFIG_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/main.cc options.o : options.cc $(OPTIONS_H) $(VECTORS_H) $(VERSION_H) @@ -120,6 +114,8 @@ output.o : output.cc output.h $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/output.cc input.o : input.cc input.h $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/input.cc +search.o : search.cc search.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/search.cc install : all force $(MKINSTALLDIRS) $(DESTDIR)$(bindir) diff --git a/src/gen-perf.cc b/src/gen-perf.cc deleted file mode 100644 index 3097798..0000000 --- a/src/gen-perf.cc +++ /dev/null @@ -1,363 +0,0 @@ -/* Provides high-level routines to manipulate the keywork list - structures the code generation output. - Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. - Written by Douglas C. Schmidt - and Bruno Haible . - - This file is part of GNU GPERF. - - GNU GPERF is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - GNU GPERF is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include -#include /* declares rand(), srand() */ -#include /* declares time() */ -#include "options.h" -#include "gen-perf.h" -#include "output.h" - -/* Efficiently returns the least power of two greater than or equal to X! */ -#define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X))) - -/* Reads input keys, possibly applies the reordering heuristic, sets the - maximum associated value size (rounded up to the nearest power of 2), - may initialize the associated values array, and determines the maximum - hash table size. Note: using the random numbers is often helpful, - though not as deterministic, of course! */ - -Gen_Perf::Gen_Perf () -{ - int asso_value_max; - int non_linked_length; - - Vectors::ALPHA_SIZE = (option[SEVENBIT] ? 128 : 256); - read_keys (); - if (option[ORDER]) - reorder (); - _num_done = 1; - _fewest_collisions = 0; - asso_value_max = option.get_size_multiple (); - non_linked_length = Key_List::keyword_list_length (); - if (asso_value_max == 0) - asso_value_max = non_linked_length; - else if (asso_value_max > 0) - asso_value_max *= non_linked_length; - else /* if (asso_value_max < 0) */ - asso_value_max = non_linked_length / -asso_value_max; - set_asso_max (POW (asso_value_max)); - - if (option[RANDOM]) - { - srand (reinterpret_cast(time (0))); - - for (int i = 0; i < ALPHA_SIZE; i++) - _asso_values[i] = (rand () & asso_value_max - 1); - } - else - { - int asso_value = option.get_initial_asso_value (); - - if (asso_value) /* Initialize array if user requests non-zero default. */ - for (int i = ALPHA_SIZE - 1; i >= 0; i--) - _asso_values[i] = asso_value & get_asso_max () - 1; - } - _max_hash_value = Key_List::max_key_length () + get_asso_max () * - get_max_keysig_size (); - _collision_detector = new Bool_Array (_max_hash_value + 1); - - if (option[DEBUG]) - fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d" - "\nmaximum size of generated hash table is %d\n", - non_linked_length, asso_value_max, _max_hash_value); -} - -/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets. - (In a multiset, an element can occur multiple times.) - Precondition: both set_1 and set_2 must be ordered. Returns the length - of the combined set. */ - -inline int -Gen_Perf::compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3) -{ - char *base = set_3; - - while (size_1 > 0 && size_2 > 0) - if (*set_1 == *set_2) - set_1++, size_1--, set_2++, size_2--; - else - { - char next; - if (*set_1 < *set_2) - next = *set_1++, size_1--; - else - next = *set_2++, size_2--; - if (set_3 == base || next != set_3[-1]) - *set_3++ = next; - } - - while (size_1 > 0) - { - char next; - next = *set_1++, size_1--; - if (set_3 == base || next != set_3[-1]) - *set_3++ = next; - } - - while (size_2 > 0) - { - char next; - next = *set_2++, size_2--; - if (set_3 == base || next != set_3[-1]) - *set_3++ = next; - } - return set_3 - base; -} - -/* Sort the UNION_SET in increasing frequency of occurrence. - This speeds up later processing since we may assume the resulting - set (Set_3, in this case), is ordered. Uses insertion sort, since - the UNION_SET is typically short. */ - -inline void -Gen_Perf::sort_set (char *union_set, int len) -{ - int i, j; - - for (i = 0, j = len - 1; i < j; i++) - { - int curr; - char tmp; - - for (curr = i + 1, tmp = union_set[curr]; - curr > 0 && _occurrences[static_cast(tmp)] < _occurrences[static_cast(union_set[curr-1])]; - curr--) - union_set[curr] = union_set[curr - 1]; - - union_set[curr] = tmp; - } -} - -/* Generate a key set's hash value. */ - -inline int -Gen_Perf::hash (KeywordExt *key_node) -{ - int sum = option[NOLENGTH] ? 0 : key_node->_allchars_length; - - const char *p = key_node->_selchars; - int i = key_node->_selchars_length; - for (; i > 0; p++, i--) - sum += _asso_values[static_cast(*p)]; - - return key_node->_hash_value = sum; -} - -/* Find out how character value change affects successfully hashed items. - Returns FALSE if no other hash values are affected, else returns TRUE. - Note that because Option.Get_Asso_Max is a power of two we can guarantee - that all legal Asso_Values are visited without repetition since - Option.Get_Jump was forced to be an odd value! */ - -inline bool -Gen_Perf::affects_prev (char c, KeywordExt *curr) -{ - int original_char = _asso_values[static_cast(c)]; - int total_iterations = !option[FAST] - ? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (); - - /* Try all legal associated values. */ - - for (int i = total_iterations - 1; i >= 0; i--) - { - int collisions = 0; - - _asso_values[static_cast(c)] = - (_asso_values[static_cast(c)] + (option.get_jump () ? option.get_jump () : rand ())) - & (get_asso_max () - 1); - - /* Iteration Number array is a win, O(1) intialization time! */ - _collision_detector->clear (); - - /* See how this asso_value change affects previous keywords. If - it does better than before we'll take it! */ - - for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest()) - { - KeywordExt *keyword = ptr->first(); - if (_collision_detector->set_bit (hash (keyword)) - && ++collisions >= _fewest_collisions) - break; - if (keyword == curr) - { - _fewest_collisions = collisions; - if (option[DEBUG]) - fprintf (stderr, "- resolved after %d iterations", total_iterations - i); - return false; - } - } - } - - /* Restore original values, no more tries. */ - _asso_values[static_cast(c)] = original_char; - /* If we're this far it's time to try the next character.... */ - return true; -} - -/* Change a character value, try least-used characters first. */ - -void -Gen_Perf::change (KeywordExt *prior, KeywordExt *curr) -{ - static char *union_set; - int union_set_length; - - if (!union_set) - union_set = new char [2 * get_max_keysig_size ()]; - - if (option[DEBUG]) - { - fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n", - _num_done, - prior->_allchars_length, prior->_allchars, - curr->_allchars_length, curr->_allchars, - curr->_hash_value); - fflush (stderr); - } - union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set); - sort_set (union_set, union_set_length); - - /* Try changing some values, if change doesn't alter other values continue normal action. */ - _fewest_collisions++; - - const char *p = union_set; - int i = union_set_length; - for (; i > 0; p++, i--) - if (!affects_prev (*p, curr)) - { - if (option[DEBUG]) - { - fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n", - *p, p - union_set + 1, _asso_values[static_cast(*p)]); - fflush (stderr); - } - return; /* Good, doesn't affect previous hash values, we'll take it. */ - } - - for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest()) - { - KeywordExt* keyword = ptr->first(); - if (keyword == curr) - break; - hash (keyword); - } - - hash (curr); - - if (option[DEBUG]) - { - fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n", - !option[FAST] ? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (), - _fewest_collisions + _total_duplicates); - fflush (stderr); - } -} - -/* Does the hard stuff.... - Initializes the Iteration Number array, and attempts to find a perfect - function that will hash all the key words without getting any - duplications. This is made much easier since we aren't attempting - to generate *minimum* functions, only perfect ones. - If we can't generate a perfect function in one pass *and* the user - hasn't enabled the DUP option, we'll inform the user to try the - randomization option, use -D, or choose alternative key positions. - The alternatives (e.g., back-tracking) are too time-consuming, i.e, - exponential in the number of keys. */ - -int -Gen_Perf::doit_all () -{ - KeywordExt_List *curr; - for (curr = _head; curr != NULL; curr = curr->rest()) - { - KeywordExt *currkw = curr->first(); - - hash (currkw); - - for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest()) - { - KeywordExt *ptrkw = ptr->first(); - - if (ptrkw->_hash_value == currkw->_hash_value) - { - change (ptrkw, currkw); - break; - } - } - _num_done++; - } - - /* Make one final check, just to make sure nothing weird happened.... */ - - _collision_detector->clear (); - - for (curr = _head; curr; curr = curr->rest()) - { - unsigned int hashcode = hash (curr->first()); - if (_collision_detector->set_bit (hashcode)) - { - if (option[DUP]) /* Keep track of this number... */ - _total_duplicates++; - else /* Yow, big problems. we're outta here! */ - { - fprintf (stderr, - "\nInternal error, duplicate value %d:\n" - "try options -D or -r, or use new key positions.\n\n", - hashcode); - return 1; - } - } - } - - /* Sorts the key word list by hash value, and then outputs the list. - The generated hash table code is only output if the early stage of - processing turned out O.K. */ - - sort (); - Output outputter (_head, _array_type, _return_type, _struct_tag, _additional_code, - _include_src, _total_keys, _total_duplicates, _max_key_len, - _min_key_len, this); - outputter.output (); - return 0; -} - -/* Prints out some diagnostics upon completion. */ - -Gen_Perf::~Gen_Perf () -{ - if (option[DEBUG]) - { - fprintf (stderr, "\ndumping occurrence and associated values tables\n"); - - for (int i = 0; i < ALPHA_SIZE; i++) - if (_occurrences[i]) - fprintf (stderr, "asso_values[%c] = %6d, occurrences[%c] = %6d\n", - i, _asso_values[i], i, _occurrences[i]); - - fprintf (stderr, "end table dumping\n"); - - } - delete _collision_detector; -} - diff --git a/src/gen-perf.h b/src/gen-perf.h deleted file mode 100644 index bc3b927..0000000 --- a/src/gen-perf.h +++ /dev/null @@ -1,53 +0,0 @@ -/* This may look like C code, but it is really -*- C++ -*- */ - -/* Provides high-level routines to manipulate the keyword list - structures the code generation output. - - Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. - Written by Douglas C. Schmidt - and Bruno Haible . - - This file is part of GNU GPERF. - - GNU GPERF is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - GNU GPERF is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#ifndef gen_perf_h -#define gen_perf_h 1 - -#include "key-list.h" -#include "bool-array.h" - -class Gen_Perf : private Key_List -{ -private: - int _max_hash_value; /* Maximum possible hash value. */ - int _fewest_collisions; /* Records fewest # of collisions for asso value. */ - int _num_done; /* Number of keywords processed without a collision. */ - Bool_Array * _collision_detector; - - void change (KeywordExt *prior, KeywordExt *curr); - bool affects_prev (char c, KeywordExt *curr); - static int hash (KeywordExt *key_node); - static int compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3); - static void sort_set (char *union_set, int len); - -public: - Gen_Perf (); - ~Gen_Perf (); - int doit_all (); -}; - -#endif diff --git a/src/input.cc b/src/input.cc index ee1b062..bfba4c6 100644 --- a/src/input.cc +++ b/src/input.cc @@ -152,6 +152,9 @@ Input::strcspn (const char *s, const char *reject) void Input::set_output_types () { + _array_type = NULL; + _return_type = NULL; + _struct_tag = NULL; if (option[TYPE]) { _array_type = get_array_type (); @@ -348,6 +351,7 @@ Input::read_keys () temp->rest() = parse_line (ptr, delimiter); /* See if any additional C code is included at end of this file. */ + _additional_code = false; if (ptr) _additional_code = true; } diff --git a/src/key-list.cc b/src/key-list.cc deleted file mode 100644 index 92b2878..0000000 --- a/src/key-list.cc +++ /dev/null @@ -1,380 +0,0 @@ -/* Routines for building, ordering, and printing the keyword list. - Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. - Written by Douglas C. Schmidt - and Bruno Haible . - - This file is part of GNU GPERF. - - GNU GPERF is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - GNU GPERF is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include -#include /* declares exit() */ -#include /* defines INT_MIN, INT_MAX */ -#include "options.h" -#include "key-list.h" -#include "input.h" -#include "hash-table.h" - -/* Make the hash table 8 times larger than the number of keyword entries. */ -static const int TABLE_MULTIPLE = 10; - -/* Efficiently returns the least power of two greater than or equal to X! */ -#define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X))) - -bool Key_List::_determined[MAX_ALPHA_SIZE]; - -/* Destructor dumps diagnostics during debugging. */ - -Key_List::~Key_List () -{ - if (option[DEBUG]) - { - fprintf (stderr, "\nDumping key list information:\ntotal non-static linked keywords = %d" - "\ntotal keywords = %d\ntotal duplicates = %d\nmaximum key length = %d\n", - _list_len, _total_keys, _total_duplicates, _max_key_len); - dump (); - fprintf (stderr, "End dumping list.\n\n"); - } -} - -class KeywordExt_Factory : public Keyword_Factory -{ -virtual Keyword * create_keyword (const char *allchars, int allchars_length, - const char *rest); -}; - -Keyword * -KeywordExt_Factory::create_keyword (const char *allchars, int allchars_length, const char *rest) -{ - return new KeywordExt (allchars, allchars_length, rest); -} - -/* Reads in all keys from standard input and creates a linked list pointed - to by _head. This list is then quickly checked for "links", i.e., - unhashable elements possessing identical key sets and lengths. */ - -void -Key_List::read_keys () -{ - KeywordExt_Factory factory; - Input inputter (&factory); - inputter.read_keys (); - _array_type = inputter._array_type; - _return_type = inputter._return_type; - _struct_tag = inputter._struct_tag; - _include_src = inputter._include_src; - _additional_code = inputter._additional_code; - _head = static_cast(inputter._head); - - KeywordExt_List *temp; - KeywordExt_List *trail = NULL; - - for (temp = _head; temp; temp = temp->rest()) - { - temp->first()->init_selchars(this); - _total_keys++; - } - - /* Hash table this number of times larger than keyword number. */ - int table_size = (_list_len = _total_keys) * TABLE_MULTIPLE; - /* Table must be a power of 2 for the hash function scheme to work. */ - KeywordExt **table = new KeywordExt*[POW (table_size)]; - - /* Make large hash table for efficiency. */ - Hash_Table found_link (table, table_size, option[NOLENGTH]); - - /* Test whether there are any links and also set the maximum length of - an identifier in the keyword list. */ - - for (temp = _head; temp; temp = temp->rest()) - { - KeywordExt *keyword = temp->first(); - KeywordExt *other_keyword = found_link.insert (keyword); - - /* Check for links. We deal with these by building an equivalence class - of all duplicate values (i.e., links) so that only 1 keyword is - representative of the entire collection. This *greatly* simplifies - processing during later stages of the program. */ - - if (other_keyword) - { - _total_duplicates++; - _list_len--; - trail->rest() = temp->rest(); - temp->first()->_duplicate_link = other_keyword->_duplicate_link; - other_keyword->_duplicate_link = temp->first(); - - /* Complain if user hasn't enabled the duplicate option. */ - if (!option[DUP] || option[DEBUG]) - fprintf (stderr, "Key link: \"%.*s\" = \"%.*s\", with key set \"%.*s\".\n", - keyword->_allchars_length, keyword->_allchars, - other_keyword->_allchars_length, other_keyword->_allchars, - keyword->_selchars_length, keyword->_selchars); - } - else - trail = temp; - - /* Update minimum and maximum keyword length, if needed. */ - if (_max_key_len < keyword->_allchars_length) - _max_key_len = keyword->_allchars_length; - if (_min_key_len > keyword->_allchars_length) - _min_key_len = keyword->_allchars_length; - } - - delete[] table; - - /* Exit program if links exists and option[DUP] not set, since we can't continue */ - if (_total_duplicates) - { - if (option[DUP]) - fprintf (stderr, "%d input keys have identical hash values, examine output carefully...\n", - _total_duplicates); - else - { - fprintf (stderr, "%d input keys have identical hash values,\ntry different key positions or use option -D.\n", - _total_duplicates); - exit (1); - } - } - /* Exit program if an empty string is used as key, since the comparison - expressions don't work correctly for looking up an empty string. */ - if (_min_key_len == 0) - { - fprintf (stderr, "Empty input key is not allowed.\nTo recognize an empty input key, your code should check for\nlen == 0 before calling the gperf generated lookup function.\n"); - exit (1); - } -} - -/* Recursively merges two sorted lists together to form one sorted list. The - ordering criteria is by frequency of occurrence of elements in the key set - or by the hash value. This is a kludge, but permits nice sharing of - almost identical code without incurring the overhead of a function - call comparison. */ - -KeywordExt_List * -Key_List::merge (KeywordExt_List *list1, KeywordExt_List *list2) -{ - KeywordExt_List *result; - KeywordExt_List **resultp = &result; - for (;;) - { - if (!list1) - { - *resultp = list2; - break; - } - if (!list2) - { - *resultp = list1; - break; - } - if (_occurrence_sort && list1->first()->_occurrence < list2->first()->_occurrence - || _hash_sort && list1->first()->_hash_value > list2->first()->_hash_value) - { - *resultp = list2; - resultp = &list2->rest(); list2 = list1; list1 = *resultp; - } - else - { - *resultp = list1; - resultp = &list1->rest(); list1 = *resultp; - } - } - return result; -} - -/* Applies the merge sort algorithm to recursively sort the key list by - frequency of occurrence of elements in the key set. */ - -KeywordExt_List * -Key_List::merge_sort (KeywordExt_List *head) -{ - if (!head || !head->rest()) - return head; - else - { - KeywordExt_List *middle = head; - KeywordExt_List *temp = head->rest()->rest(); - - while (temp) - { - temp = temp->rest(); - middle = middle->rest(); - if (temp) - temp = temp->rest(); - } - - temp = middle->rest(); - middle->rest() = 0; - return merge (merge_sort (head), merge_sort (temp)); - } -} - -/* Returns the frequency of occurrence of elements in the key set. */ - -inline int -Key_List::get_occurrence (KeywordExt *ptr) -{ - int value = 0; - - const char *p = ptr->_selchars; - unsigned int i = ptr->_selchars_length; - for (; i > 0; p++, i--) - value += _occurrences[static_cast(*p)]; - - return value; -} - -/* Enables the index location of all key set elements that are now - determined. */ - -inline void -Key_List::set_determined (KeywordExt *ptr) -{ - const char *p = ptr->_selchars; - unsigned int i = ptr->_selchars_length; - for (; i > 0; p++, i--) - _determined[static_cast(*p)] = true; -} - -/* Returns TRUE if PTR's key set is already completely determined. */ - -inline bool -Key_List::already_determined (KeywordExt *ptr) -{ - bool is_determined = true; - - const char *p = ptr->_selchars; - unsigned int i = ptr->_selchars_length; - for (; is_determined && i > 0; p++, i--) - is_determined = _determined[static_cast(*p)]; - - return is_determined; -} - -/* Reorders the table by first sorting the list so that frequently occuring - keys appear first, and then the list is reordered so that keys whose values - are already determined will be placed towards the front of the list. This - helps prune the search time by handling inevitable collisions early in the - search process. See Cichelli's paper from Jan 1980 JACM for details.... */ - -void -Key_List::reorder () -{ - KeywordExt_List *ptr; - for (ptr = _head; ptr; ptr = ptr->rest()) - { - KeywordExt *keyword = ptr->first(); - - keyword->_occurrence = get_occurrence (keyword); - } - - _hash_sort = false; - _occurrence_sort = true; - - _head = merge_sort (_head); - - for (ptr = _head; ptr->rest(); ptr = ptr->rest()) - { - set_determined (ptr->first()); - - if (!already_determined (ptr->rest()->first())) - { - KeywordExt_List *trail_ptr = ptr->rest(); - KeywordExt_List *run_ptr = trail_ptr->rest(); - - for (; run_ptr; run_ptr = trail_ptr->rest()) - { - - if (already_determined (run_ptr->first())) - { - trail_ptr->rest() = run_ptr->rest(); - run_ptr->rest() = ptr->rest(); - ptr = ptr->rest() = run_ptr; - } - else - trail_ptr = run_ptr; - } - } - } -} - -/* Sorts the keys by hash value. */ - -void -Key_List::sort () -{ - _hash_sort = true; - _occurrence_sort = false; - - _head = merge_sort (_head); -} - -/* Dumps the key list to stderr stream. */ - -void -Key_List::dump () -{ - int field_width = get_max_keysig_size (); - - fprintf (stderr, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n", - field_width, "selchars"); - - for (KeywordExt_List *ptr = _head; ptr; ptr = ptr->rest()) - fprintf (stderr, "%11d,%11d,%6d, %*.*s, %.*s\n", - ptr->first()->_hash_value, ptr->first()->_allchars_length, ptr->first()->_final_index, - field_width, ptr->first()->_selchars_length, ptr->first()->_selchars, - ptr->first()->_allchars_length, ptr->first()->_allchars); -} - -/* Simple-minded constructor action here... */ - -Key_List::Key_List () -{ - _total_keys = 0; - _max_key_len = INT_MIN; - _min_key_len = INT_MAX; - _array_type = 0; - _return_type = 0; - _struct_tag = 0; - _head = 0; - _total_duplicates = 0; - _additional_code = false; -} - -/* Returns the length of entire key list. */ - -int -Key_List::keyword_list_length () -{ - return _list_len; -} - -/* Returns length of longest key read. */ - -int -Key_List::max_key_length () -{ - return _max_key_len; -} - -/* Returns number of key positions. */ - -int -Key_List::get_max_keysig_size () -{ - return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size (); -} diff --git a/src/keyword.cc b/src/keyword.cc index c7a3d4b..948f708 100644 --- a/src/keyword.cc +++ b/src/keyword.cc @@ -60,8 +60,8 @@ static inline void sort_char_set (char *base, int len) } } -/* Initialize selchars and selchars_length, and update v->occurrences. */ -void KeywordExt::init_selchars (Vectors *v) +/* Initialize selchars and selchars_length, and update occurrences. */ +void KeywordExt::init_selchars (int *occurrences) { const char *k = _allchars; char *key_set = @@ -71,7 +71,7 @@ void KeywordExt::init_selchars (Vectors *v) if (option[ALLCHARS]) /* Use all the character positions in the KEY. */ for (int i = _allchars_length; i > 0; k++, ptr++, i--) - v->_occurrences[static_cast(*ptr = *k)]++; + occurrences[static_cast(*ptr = *k)]++; else /* Only use those character positions specified by the user. */ { @@ -90,7 +90,7 @@ void KeywordExt::init_selchars (Vectors *v) else /* Out of range of KEY length, so we'll just skip it. */ continue; - v->_occurrences[static_cast(*ptr)]++; + occurrences[static_cast(*ptr)]++; ptr++; } diff --git a/src/keyword.h b/src/keyword.h index 164de99..d78cb0c 100644 --- a/src/keyword.h +++ b/src/keyword.h @@ -57,8 +57,8 @@ struct KeywordExt : public Keyword KeywordExt * _duplicate_link; /* Methods depending on the keyposition list. */ - /* Initialize selchars and selchars_length, and update v->occurrences. */ - void init_selchars (Vectors *v); + /* Initialize selchars and selchars_length, and update occurrences. */ + void init_selchars (int *occurrences); /* Data members used by the algorithm. */ int _occurrence; /* A metric for frequency of key set occurrences. */ diff --git a/src/main.cc b/src/main.cc index 3b3a866..f73bd7e 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1,4 +1,4 @@ -/* Driver program for the Gen_Perf hash function generator +/* Driver program for the hash function generator Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. Written by Douglas C. Schmidt and Bruno Haible . @@ -20,26 +20,63 @@ If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* Simple driver program for the Gen_Perf.hash function generator. - Most of the hard work is done in class Gen_Perf and its class methods. */ - #include #include "options.h" -#include "gen-perf.h" +#include "input.h" +#include "search.h" +#include "output.h" + + +/* This Keyword factory produces KeywordExt instances. */ + +class KeywordExt_Factory : public Keyword_Factory +{ +virtual Keyword * create_keyword (const char *allchars, int allchars_length, + const char *rest); +}; + +Keyword * +KeywordExt_Factory::create_keyword (const char *allchars, int allchars_length, const char *rest) +{ + return new KeywordExt (allchars, allchars_length, rest); +} + int main (int argc, char *argv[]) { - /* Sets the Options. */ + /* Set the Options. */ option.parse_options (argc, argv); - /* Initializes the key word list. */ - Gen_Perf generate_table; + /* Initialize the key word list. */ + KeywordExt_Factory factory; + Input inputter (&factory); + Vectors::ALPHA_SIZE = (option[SEVENBIT] ? 128 : 256); + inputter.read_keys (); + /* We can cast the keyword list to KeywordExt_List* because its list + elements were created by KeywordExt_Factory. */ + KeywordExt_List* list = static_cast(inputter._head); - /* Generates and prints the Gen_Perf hash table. */ - int status = generate_table.doit_all (); + /* Search for a good hash function. */ + Search searcher (list); + searcher.optimize (); + + /* Output the hash function code. */ + Output outputter (searcher._head, + inputter._array_type, + inputter._return_type, + inputter._struct_tag, + inputter._additional_code, + inputter._include_src, + searcher._total_keys, + searcher._total_duplicates, + searcher._max_key_len, + searcher._min_key_len, + &searcher); + outputter.output (); /* Check for write error on stdout. */ + int status = 0; if (fflush (stdout) || ferror (stdout)) status = 1; diff --git a/src/search.cc b/src/search.cc new file mode 100644 index 0000000..0a16055 --- /dev/null +++ b/src/search.cc @@ -0,0 +1,628 @@ +/* Search algorithm. + Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. + Written by Douglas C. Schmidt + and Bruno Haible . + + This file is part of GNU GPERF. + + GNU GPERF is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU GPERF is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include +#include /* declares exit(), rand(), srand() */ +#include /* declares memset(), memcmp() */ +#include /* declares time() */ +#include /* defines INT_MIN, INT_MAX */ +#include "options.h" +#include "hash-table.h" +#include "search.h" + +/* Make the hash table 8 times larger than the number of keyword entries. */ +static const int TABLE_MULTIPLE = 10; + +/* Efficiently returns the least power of two greater than or equal to X! */ +#define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X))) + +Search::Search (KeywordExt_List *list) + : _head (list) +{ +} + +bool Search::_determined[MAX_ALPHA_SIZE]; + +void +Search::prepare () +{ + KeywordExt_List *temp; + KeywordExt_List *trail = NULL; + + _total_keys = 0; + for (temp = _head; temp; temp = temp->rest()) + { + temp->first()->init_selchars(_occurrences); + _total_keys++; + } + + /* Hash table this number of times larger than keyword number. */ + int table_size = (_list_len = _total_keys) * TABLE_MULTIPLE; + /* Table must be a power of 2 for the hash function scheme to work. */ + KeywordExt **table = new KeywordExt*[POW (table_size)]; + + /* Make large hash table for efficiency. */ + Hash_Table found_link (table, table_size, option[NOLENGTH]); + + /* Test whether there are any links and also set the maximum length of + an identifier in the keyword list. */ + _total_duplicates = 0; + _max_key_len = INT_MIN; + _min_key_len = INT_MAX; + for (temp = _head; temp; temp = temp->rest()) + { + KeywordExt *keyword = temp->first(); + KeywordExt *other_keyword = found_link.insert (keyword); + + /* Check for links. We deal with these by building an equivalence class + of all duplicate values (i.e., links) so that only 1 keyword is + representative of the entire collection. This *greatly* simplifies + processing during later stages of the program. */ + + if (other_keyword) + { + _total_duplicates++; + _list_len--; + trail->rest() = temp->rest(); + temp->first()->_duplicate_link = other_keyword->_duplicate_link; + other_keyword->_duplicate_link = temp->first(); + + /* Complain if user hasn't enabled the duplicate option. */ + if (!option[DUP] || option[DEBUG]) + fprintf (stderr, "Key link: \"%.*s\" = \"%.*s\", with key set \"%.*s\".\n", + keyword->_allchars_length, keyword->_allchars, + other_keyword->_allchars_length, other_keyword->_allchars, + keyword->_selchars_length, keyword->_selchars); + } + else + trail = temp; + + /* Update minimum and maximum keyword length, if needed. */ + if (_max_key_len < keyword->_allchars_length) + _max_key_len = keyword->_allchars_length; + if (_min_key_len > keyword->_allchars_length) + _min_key_len = keyword->_allchars_length; + } + + delete[] table; + + /* Exit program if links exists and option[DUP] not set, since we can't continue */ + if (_total_duplicates) + { + if (option[DUP]) + fprintf (stderr, "%d input keys have identical hash values, examine output carefully...\n", + _total_duplicates); + else + { + fprintf (stderr, "%d input keys have identical hash values,\ntry different key positions or use option -D.\n", + _total_duplicates); + exit (1); + } + } + /* Exit program if an empty string is used as key, since the comparison + expressions don't work correctly for looking up an empty string. */ + if (_min_key_len == 0) + { + fprintf (stderr, "Empty input key is not allowed.\nTo recognize an empty input key, your code should check for\nlen == 0 before calling the gperf generated lookup function.\n"); + exit (1); + } +} + +/* Recursively merges two sorted lists together to form one sorted list. The + ordering criteria is by frequency of occurrence of elements in the key set + or by the hash value. This is a kludge, but permits nice sharing of + almost identical code without incurring the overhead of a function + call comparison. */ + +KeywordExt_List * +Search::merge (KeywordExt_List *list1, KeywordExt_List *list2) +{ + KeywordExt_List *result; + KeywordExt_List **resultp = &result; + for (;;) + { + if (!list1) + { + *resultp = list2; + break; + } + if (!list2) + { + *resultp = list1; + break; + } + if (_occurrence_sort && list1->first()->_occurrence < list2->first()->_occurrence + || _hash_sort && list1->first()->_hash_value > list2->first()->_hash_value) + { + *resultp = list2; + resultp = &list2->rest(); list2 = list1; list1 = *resultp; + } + else + { + *resultp = list1; + resultp = &list1->rest(); list1 = *resultp; + } + } + return result; +} + +/* Applies the merge sort algorithm to recursively sort the key list by + frequency of occurrence of elements in the key set. */ + +KeywordExt_List * +Search::merge_sort (KeywordExt_List *head) +{ + if (!head || !head->rest()) + return head; + else + { + KeywordExt_List *middle = head; + KeywordExt_List *temp = head->rest()->rest(); + + while (temp) + { + temp = temp->rest(); + middle = middle->rest(); + if (temp) + temp = temp->rest(); + } + + temp = middle->rest(); + middle->rest() = 0; + return merge (merge_sort (head), merge_sort (temp)); + } +} + +/* Returns the frequency of occurrence of elements in the key set. */ + +inline int +Search::get_occurrence (KeywordExt *ptr) +{ + int value = 0; + + const char *p = ptr->_selchars; + unsigned int i = ptr->_selchars_length; + for (; i > 0; p++, i--) + value += _occurrences[static_cast(*p)]; + + return value; +} + +/* Enables the index location of all key set elements that are now + determined. */ + +inline void +Search::set_determined (KeywordExt *ptr) +{ + const char *p = ptr->_selchars; + unsigned int i = ptr->_selchars_length; + for (; i > 0; p++, i--) + _determined[static_cast(*p)] = true; +} + +/* Returns TRUE if PTR's key set is already completely determined. */ + +inline bool +Search::already_determined (KeywordExt *ptr) +{ + bool is_determined = true; + + const char *p = ptr->_selchars; + unsigned int i = ptr->_selchars_length; + for (; is_determined && i > 0; p++, i--) + is_determined = _determined[static_cast(*p)]; + + return is_determined; +} + +/* Reorders the table by first sorting the list so that frequently occuring + keys appear first, and then the list is reordered so that keys whose values + are already determined will be placed towards the front of the list. This + helps prune the search time by handling inevitable collisions early in the + search process. See Cichelli's paper from Jan 1980 JACM for details.... */ + +void +Search::reorder () +{ + KeywordExt_List *ptr; + for (ptr = _head; ptr; ptr = ptr->rest()) + { + KeywordExt *keyword = ptr->first(); + + keyword->_occurrence = get_occurrence (keyword); + } + + _hash_sort = false; + _occurrence_sort = true; + + _head = merge_sort (_head); + + for (ptr = _head; ptr->rest(); ptr = ptr->rest()) + { + set_determined (ptr->first()); + + if (!already_determined (ptr->rest()->first())) + { + KeywordExt_List *trail_ptr = ptr->rest(); + KeywordExt_List *run_ptr = trail_ptr->rest(); + + for (; run_ptr; run_ptr = trail_ptr->rest()) + { + + if (already_determined (run_ptr->first())) + { + trail_ptr->rest() = run_ptr->rest(); + run_ptr->rest() = ptr->rest(); + ptr = ptr->rest() = run_ptr; + } + else + trail_ptr = run_ptr; + } + } + } +} + +/* Returns the length of entire key list. */ + +int +Search::keyword_list_length () +{ + return _list_len; +} + +/* Returns length of longest key read. */ + +int +Search::max_key_length () +{ + return _max_key_len; +} + +/* Returns number of key positions. */ + +int +Search::get_max_keysig_size () +{ + return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size (); +} + +/* Generate a key set's hash value. */ + +inline int +Search::hash (KeywordExt *key_node) +{ + int sum = option[NOLENGTH] ? 0 : key_node->_allchars_length; + + const char *p = key_node->_selchars; + int i = key_node->_selchars_length; + for (; i > 0; p++, i--) + sum += _asso_values[static_cast(*p)]; + + return key_node->_hash_value = sum; +} + +/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets. + (In a multiset, an element can occur multiple times.) + Precondition: both set_1 and set_2 must be ordered. Returns the length + of the combined set. */ + +inline int +Search::compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3) +{ + char *base = set_3; + + while (size_1 > 0 && size_2 > 0) + if (*set_1 == *set_2) + set_1++, size_1--, set_2++, size_2--; + else + { + char next; + if (*set_1 < *set_2) + next = *set_1++, size_1--; + else + next = *set_2++, size_2--; + if (set_3 == base || next != set_3[-1]) + *set_3++ = next; + } + + while (size_1 > 0) + { + char next; + next = *set_1++, size_1--; + if (set_3 == base || next != set_3[-1]) + *set_3++ = next; + } + + while (size_2 > 0) + { + char next; + next = *set_2++, size_2--; + if (set_3 == base || next != set_3[-1]) + *set_3++ = next; + } + return set_3 - base; +} + +/* Sort the UNION_SET in increasing frequency of occurrence. + This speeds up later processing since we may assume the resulting + set (Set_3, in this case), is ordered. Uses insertion sort, since + the UNION_SET is typically short. */ + +inline void +Search::sort_set (char *union_set, int len) +{ + int i, j; + + for (i = 0, j = len - 1; i < j; i++) + { + int curr; + char tmp; + + for (curr = i + 1, tmp = union_set[curr]; + curr > 0 && _occurrences[static_cast(tmp)] < _occurrences[static_cast(union_set[curr-1])]; + curr--) + union_set[curr] = union_set[curr - 1]; + + union_set[curr] = tmp; + } +} + +/* Find out how character value change affects successfully hashed items. + Returns FALSE if no other hash values are affected, else returns TRUE. + Note that because Option.Get_Asso_Max is a power of two we can guarantee + that all legal Asso_Values are visited without repetition since + Option.Get_Jump was forced to be an odd value! */ + +inline bool +Search::affects_prev (char c, KeywordExt *curr) +{ + int original_char = _asso_values[static_cast(c)]; + int total_iterations = !option[FAST] + ? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (); + + /* Try all legal associated values. */ + + for (int i = total_iterations - 1; i >= 0; i--) + { + int collisions = 0; + + _asso_values[static_cast(c)] = + (_asso_values[static_cast(c)] + (option.get_jump () ? option.get_jump () : rand ())) + & (get_asso_max () - 1); + + /* Iteration Number array is a win, O(1) intialization time! */ + _collision_detector->clear (); + + /* See how this asso_value change affects previous keywords. If + it does better than before we'll take it! */ + + for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest()) + { + KeywordExt *keyword = ptr->first(); + if (_collision_detector->set_bit (hash (keyword)) + && ++collisions >= _fewest_collisions) + break; + if (keyword == curr) + { + _fewest_collisions = collisions; + if (option[DEBUG]) + fprintf (stderr, "- resolved after %d iterations", total_iterations - i); + return false; + } + } + } + + /* Restore original values, no more tries. */ + _asso_values[static_cast(c)] = original_char; + /* If we're this far it's time to try the next character.... */ + return true; +} + +/* Change a character value, try least-used characters first. */ + +void +Search::change (KeywordExt *prior, KeywordExt *curr) +{ + static char *union_set; + int union_set_length; + + if (!union_set) + union_set = new char [2 * get_max_keysig_size ()]; + + if (option[DEBUG]) + { + fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n", + _num_done, + prior->_allchars_length, prior->_allchars, + curr->_allchars_length, curr->_allchars, + curr->_hash_value); + fflush (stderr); + } + union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set); + sort_set (union_set, union_set_length); + + /* Try changing some values, if change doesn't alter other values continue normal action. */ + _fewest_collisions++; + + const char *p = union_set; + int i = union_set_length; + for (; i > 0; p++, i--) + if (!affects_prev (*p, curr)) + { + if (option[DEBUG]) + { + fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n", + *p, p - union_set + 1, _asso_values[static_cast(*p)]); + fflush (stderr); + } + return; /* Good, doesn't affect previous hash values, we'll take it. */ + } + + for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest()) + { + KeywordExt* keyword = ptr->first(); + if (keyword == curr) + break; + hash (keyword); + } + + hash (curr); + + if (option[DEBUG]) + { + fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n", + !option[FAST] ? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (), + _fewest_collisions + _total_duplicates); + fflush (stderr); + } +} + +/* Sorts the keys by hash value. */ + +void +Search::sort () +{ + _hash_sort = true; + _occurrence_sort = false; + + _head = merge_sort (_head); +} + +void +Search::optimize () +{ + prepare (); + if (option[ORDER]) + reorder (); + _num_done = 1; + _fewest_collisions = 0; + int asso_value_max = option.get_size_multiple (); + int non_linked_length = keyword_list_length (); + if (asso_value_max == 0) + asso_value_max = non_linked_length; + else if (asso_value_max > 0) + asso_value_max *= non_linked_length; + else /* if (asso_value_max < 0) */ + asso_value_max = non_linked_length / -asso_value_max; + set_asso_max (POW (asso_value_max)); + + if (option[RANDOM]) + { + srand (reinterpret_cast(time (0))); + + for (int i = 0; i < ALPHA_SIZE; i++) + _asso_values[i] = (rand () & asso_value_max - 1); + } + else + { + int asso_value = option.get_initial_asso_value (); + + if (asso_value) /* Initialize array if user requests non-zero default. */ + for (int i = ALPHA_SIZE - 1; i >= 0; i--) + _asso_values[i] = asso_value & get_asso_max () - 1; + } + _max_hash_value = max_key_length () + get_asso_max () * get_max_keysig_size (); + _collision_detector = new Bool_Array (_max_hash_value + 1); + + if (option[DEBUG]) + fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d" + "\nmaximum size of generated hash table is %d\n", + non_linked_length, asso_value_max, _max_hash_value); + + KeywordExt_List *curr; + for (curr = _head; curr != NULL; curr = curr->rest()) + { + KeywordExt *currkw = curr->first(); + + hash (currkw); + + for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest()) + { + KeywordExt *ptrkw = ptr->first(); + + if (ptrkw->_hash_value == currkw->_hash_value) + { + change (ptrkw, currkw); + break; + } + } + _num_done++; + } + + /* Make one final check, just to make sure nothing weird happened.... */ + + _collision_detector->clear (); + + for (curr = _head; curr; curr = curr->rest()) + { + unsigned int hashcode = hash (curr->first()); + if (_collision_detector->set_bit (hashcode)) + { + if (option[DUP]) /* Keep track of this number... */ + _total_duplicates++; + else /* Yow, big problems. we're outta here! */ + { + fprintf (stderr, + "\nInternal error, duplicate value %d:\n" + "try options -D or -r, or use new key positions.\n\n", + hashcode); + exit (1); + } + } + } + + /* Sorts the key word list by hash value. */ + sort (); +} + +/* Prints out some diagnostics upon completion. */ + +Search::~Search () +{ + delete _collision_detector; + if (option[DEBUG]) + { + fprintf (stderr, "\ndumping occurrence and associated values tables\n"); + + for (int i = 0; i < ALPHA_SIZE; i++) + if (_occurrences[i]) + fprintf (stderr, "asso_values[%c] = %6d, occurrences[%c] = %6d\n", + i, _asso_values[i], i, _occurrences[i]); + + fprintf (stderr, "end table dumping\n"); + + fprintf (stderr, "\nDumping key list information:\ntotal non-static linked keywords = %d" + "\ntotal keywords = %d\ntotal duplicates = %d\nmaximum key length = %d\n", + _list_len, _total_keys, _total_duplicates, _max_key_len); + + int field_width = get_max_keysig_size (); + fprintf (stderr, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n", + field_width, "selchars"); + for (KeywordExt_List *ptr = _head; ptr; ptr = ptr->rest()) + fprintf (stderr, "%11d,%11d,%6d, %*.*s, %.*s\n", + ptr->first()->_hash_value, ptr->first()->_allchars_length, ptr->first()->_final_index, + field_width, ptr->first()->_selchars_length, ptr->first()->_selchars, + ptr->first()->_allchars_length, ptr->first()->_allchars); + + fprintf (stderr, "End dumping list.\n\n"); + } +} diff --git a/src/key-list.h b/src/search.h similarity index 69% rename from src/key-list.h rename to src/search.h index a12b97f..4e94bc2 100644 --- a/src/key-list.h +++ b/src/search.h @@ -1,8 +1,8 @@ /* This may look like C code, but it is really -*- C++ -*- */ -/* Data and function member declarations for the keyword list class. +/* Search algorithm. - Copyright (C) 1989-1998, 2002 Free Software Foundation, Inc. + Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. Written by Douglas C. Schmidt and Bruno Haible . @@ -23,59 +23,52 @@ If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* The key word list is a useful abstraction that keeps track of - various pieces of information that enable that fast generation - of the Gen_Perf.hash function. A Key_List is a singly-linked - list of List_Nodes. */ - -#ifndef key_list_h -#define key_list_h 1 +#ifndef search_h +#define search_h 1 #include "keyword-list.h" #include "vectors.h" -#include "read-line.h" +#include "bool-array.h" -class Key_List : public Vectors +class Search : public Vectors { -protected: - const char * _array_type; /* Pointer to the type for word list. */ - const char * _return_type; /* Pointer to return type for lookup function. */ - const char * _struct_tag; /* Shorthand for user-defined struct tag type. */ - const char * _include_src; /* C source code to be included verbatim. */ +public: + Search (KeywordExt_List *list); + ~Search (); + void optimize (); +private: + void prepare (); + KeywordExt_List * merge (KeywordExt_List *list1, KeywordExt_List *list2); + KeywordExt_List * merge_sort (KeywordExt_List *head); + static int get_occurrence (KeywordExt *ptr); + static void set_determined (KeywordExt *ptr); + static bool already_determined (KeywordExt *ptr); + void reorder (); + int keyword_list_length (); + int max_key_length (); + int get_max_keysig_size (); + static int hash (KeywordExt *key_node); + static int compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3); + static void sort_set (char *union_set, int len); + bool affects_prev (char c, KeywordExt *curr); + void change (KeywordExt *prior, KeywordExt *curr); + void sort (); +public: + KeywordExt_List * _head; /* Points to the head of the linked list. */ + int _total_keys; /* Total number of keys, counting duplicates. */ + int _total_duplicates; /* Total number of duplicate hash values. */ int _max_key_len; /* Maximum length of the longest keyword. */ int _min_key_len; /* Minimum length of the shortest keyword. */ private: + int _list_len; /* Length of head's Key_List, not counting duplicates. */ bool _occurrence_sort; /* True if sorting by occurrence. */ bool _hash_sort; /* True if sorting by hash value. */ -protected: - bool _additional_code; /* True if any additional C code is included. */ -private: - int _list_len; /* Length of head's Key_List, not counting duplicates. */ -protected: - int _total_keys; /* Total number of keys, counting duplicates. */ - int _size; /* Range of the hash table. */ -private: static bool _determined[MAX_ALPHA_SIZE]; /* Used in function reorder, below. */ - static int get_occurrence (KeywordExt *ptr); - static bool already_determined (KeywordExt *ptr); - static void set_determined (KeywordExt *ptr); - void dump (); - KeywordExt_List * merge (KeywordExt_List *list1, KeywordExt_List *list2); - KeywordExt_List * merge_sort (KeywordExt_List *head); - -protected: - KeywordExt_List * _head; /* Points to the head of the linked list. */ - int _total_duplicates; /* Total number of duplicate hash values. */ - -public: - Key_List (); - ~Key_List (); - int keyword_list_length (); - int max_key_length (); - void reorder (); - void sort (); - void read_keys (); - int get_max_keysig_size (); + int _num_done; /* Number of keywords processed without a collision. */ + int _fewest_collisions; /* Records fewest # of collisions for asso value. */ + int _max_hash_value; /* Maximum possible hash value. */ + Bool_Array * _collision_detector; + int _size; /* Range of the hash table. */ void set_asso_max (int r) { _size = r; } int get_asso_max () { return _size; } };