1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Optimize: Use a hash table in compute_partition.

This reduces the execution time of gperf on large inputs by ca. 30%.

* autogen.sh (GNULIB_MODULES): Add map-c++, hash-map.
* src/keyword.h: Include <stddef.h>.
(struct KeywordExt): Add fields _undetermined_chars,
_undetermined_chars_length, _undetermined_chars_hashcode.
* src/search.cc: Include gl_map.hh, gl_hash_map.h.
(Search::prepare_asso_values): Initialize the _undetermined_chars field.
(struct EquivalenceClass): Remove the fields _undetermined_chars,
_undetermined_chars_length.
(undetermined_equals, undetermined_hashcode): New functions.
(Search::compute_partition): Initialize the _undetermined_chars* fields
of all keywords. Use a hash map instead of a loop over the equivalence
classes.
(Search::find_good_asso_values): Deallocate the _undetermined_chars field.
This commit is contained in:
Bruno Haible
2025-04-19 15:03:01 +02:00
parent 43fa5ebcb7
commit 772a63e46d
5 changed files with 118 additions and 25 deletions

View File

@@ -1,3 +1,21 @@
2025-04-19 Bruno Haible <bruno@clisp.org>
Optimize: Use a hash table in compute_partition.
This reduces the execution time of gperf on large inputs by ca. 30%.
* autogen.sh (GNULIB_MODULES): Add map-c++, hash-map.
* src/keyword.h: Include <stddef.h>.
(struct KeywordExt): Add fields _undetermined_chars,
_undetermined_chars_length, _undetermined_chars_hashcode.
* src/search.cc: Include gl_map.hh, gl_hash_map.h.
(Search::prepare_asso_values): Initialize the _undetermined_chars field.
(struct EquivalenceClass): Remove the fields _undetermined_chars,
_undetermined_chars_length.
(undetermined_equals, undetermined_hashcode): New functions.
(Search::compute_partition): Initialize the _undetermined_chars* fields
of all keywords. Use a hash map instead of a loop over the equivalence
classes.
(Search::find_good_asso_values): Deallocate the _undetermined_chars field.
2025-04-19 Bruno Haible <bruno@clisp.org> 2025-04-19 Bruno Haible <bruno@clisp.org>
Optimize: Make Bool_Array take less memory. Optimize: Make Bool_Array take less memory.

View File

@@ -66,6 +66,7 @@ if test $skip_gnulib = false; then
GNULIB_MODULES=' GNULIB_MODULES='
filename filename
getopt-gnu getopt-gnu
map-c++ hash-map
read-file read-file
package-version package-version
' '

28
lib/.gitignore vendored
View File

@@ -5,12 +5,19 @@
/alloca.in.h /alloca.in.h
/arg-nonnull.h /arg-nonnull.h
/assert.in.h /assert.in.h
/attribute.h
/basename-lgpl.c
/basename-lgpl.h
/c++defs.h /c++defs.h
/cloexec.c /cloexec.c
/cloexec.h /cloexec.h
/close.c /close.c
/dup2.c /dup2.c
/errno.in.h /errno.in.h
/error.c
/error.in.h
/exitfail.c
/exitfail.h
/fcntl.c /fcntl.c
/fcntl.in.h /fcntl.in.h
/fd-hook.c /fd-hook.c
@@ -31,9 +38,22 @@
/getopt.in.h /getopt.in.h
/getopt1.c /getopt1.c
/getopt_int.h /getopt_int.h
/getprogname.c
/getprogname.h
/gettext.h /gettext.h
/gl_anyhash1.h
/gl_anyhash2.h
/gl_anyhash_primes.h
/gl_hash_map.c
/gl_hash_map.h
/gl_map.c
/gl_map.h
/gl_map.hh
/gl_xmap.c
/gl_xmap.h
/idx.h /idx.h
/intprops-internal.h /intprops-internal.h
/intprops.h
/inttypes.in.h /inttypes.in.h
/limits.in.h /limits.in.h
/lseek.c /lseek.c
@@ -51,6 +71,7 @@
/read-file.c /read-file.c
/read-file.h /read-file.h
/realloc.c /realloc.c
/size_max.h
/stat-time.c /stat-time.c
/stat-time.h /stat-time.h
/stat-w32.c /stat-w32.c
@@ -65,6 +86,9 @@
/stdio.in.h /stdio.in.h
/stdlib.c /stdlib.c
/stdlib.in.h /stdlib.in.h
/strerror-override.c
/strerror-override.h
/strerror.c
/string.in.h /string.in.h
/sys_stat.in.h /sys_stat.in.h
/sys_types.in.h /sys_types.in.h
@@ -74,7 +98,11 @@
/verify.h /verify.h
/warn-on-use.h /warn-on-use.h
/wchar.in.h /wchar.in.h
/xalloc-die.c
/xalloc-oversized.h /xalloc-oversized.h
/xalloc.h
/xsize.c
/xsize.h
# Files generated by the autotools: # Files generated by the autotools:
/aclocal.m4 /aclocal.m4

View File

@@ -24,6 +24,8 @@
#ifndef keyword_h #ifndef keyword_h
#define keyword_h 1 #define keyword_h 1
#include <stddef.h> /* defines size_t */
/* Class defined in "positions.h". */ /* Class defined in "positions.h". */
class Positions; class Positions;
@@ -72,6 +74,13 @@ struct KeywordExt : public Keyword
/* Deletes selchars. */ /* Deletes selchars. */
void delete_selchars (); void delete_selchars ();
/* Data members used by the algorithm, specifically compute_partition. */
/* The undetermined selected characters for this keyword, as a
canonically reordered multiset. */
unsigned int * _undetermined_chars;
unsigned int _undetermined_chars_length;
size_t _undetermined_chars_hashcode;
/* Data members used by the algorithm. */ /* Data members used by the algorithm. */
int _hash_value; /* Hash value for the keyword. */ int _hash_value; /* Hash value for the keyword. */

View File

@@ -28,7 +28,9 @@
#include <string.h> /* declares memset(), memcmp() */ #include <string.h> /* declares memset(), memcmp() */
#include <time.h> /* declares time() */ #include <time.h> /* declares time() */
#include <math.h> /* declares exp() */ #include <math.h> /* declares exp() */
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */ #include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX, CHAR_BIT */
#include "gl_map.hh"
#include "gl_hash_map.h"
#include "options.h" #include "options.h"
#include "hash-table.h" #include "hash-table.h"
@@ -839,6 +841,13 @@ Search::prepare_asso_values ()
/* Memory allocation. */ /* Memory allocation. */
_asso_values = new int[_alpha_size]; _asso_values = new int[_alpha_size];
/* Memory allocation in each Keyword. */
for (temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
keyword->_undetermined_chars = new unsigned int[keyword->_selchars_length];
}
int non_linked_length = _list_len; int non_linked_length = _list_len;
unsigned int asso_value_max; unsigned int asso_value_max;
@@ -943,10 +952,6 @@ struct EquivalenceClass
KeywordExt_List * _keywords_last; KeywordExt_List * _keywords_last;
/* The number of keywords in this equivalence class. */ /* The number of keywords in this equivalence class. */
unsigned int _cardinality; unsigned int _cardinality;
/* The undetermined selected characters for the keywords in this
equivalence class, as a canonically reordered multiset. */
unsigned int * _undetermined_chars;
unsigned int _undetermined_chars_length;
EquivalenceClass * _next; EquivalenceClass * _next;
}; };
@@ -984,48 +989,78 @@ equals (const unsigned int *ptr1, const unsigned int *ptr2, unsigned int len)
return true; return true;
} }
static bool
undetermined_equals (KeywordExt *key1, KeywordExt *key2)
{
return (key1->_undetermined_chars_length == key2 ->_undetermined_chars_length)
&& equals (key1->_undetermined_chars, key2->_undetermined_chars,
key1->_undetermined_chars_length);
}
static size_t
undetermined_hashcode (KeywordExt *key)
{
return key->_undetermined_chars_hashcode;
}
EquivalenceClass * EquivalenceClass *
Search::compute_partition (bool *undetermined) const Search::compute_partition (bool *undetermined) const
{ {
EquivalenceClass *partition = NULL; /* Prepare the use of the hash-map: For each keyword,
EquivalenceClass *partition_last = NULL; compute the undetermined characters and their hash code. */
for (KeywordExt_List *temp = _head; temp; temp = temp->rest()) for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{ {
KeywordExt *keyword = temp->first(); KeywordExt *keyword = temp->first();
/* Compute the undetermined characters for this keyword. */ /* This scratch memory, an array of length keyword->_selchars_length,
unsigned int *undetermined_chars = was allocated earlier. */
new unsigned int[keyword->_selchars_length]; unsigned int *undetermined_chars = keyword->_undetermined_chars;
unsigned int undetermined_chars_length = 0; unsigned int undetermined_chars_length = 0;
for (int i = 0; i < keyword->_selchars_length; i++) for (int i = 0; i < keyword->_selchars_length; i++)
if (undetermined[keyword->_selchars[i]]) if (undetermined[keyword->_selchars[i]])
undetermined_chars[undetermined_chars_length++] = keyword->_selchars[i]; undetermined_chars[undetermined_chars_length++] = keyword->_selchars[i];
keyword->_undetermined_chars_length = undetermined_chars_length;
{
const int SIZE_BITS = sizeof (size_t) * CHAR_BIT;
size_t h = undetermined_chars_length;
for (unsigned int i = 0; i < undetermined_chars_length; i++)
h = undetermined_chars[i] * 641 + ((h << 9) | (h >> (SIZE_BITS - 9)));
keyword->_undetermined_chars_hashcode = h;
}
}
EquivalenceClass *partition = NULL;
EquivalenceClass *partition_last = NULL;
/* A hash-map that maps each keyword to the EquivalenceClass that contains
it. */
gl_Map<KeywordExt *, EquivalenceClass const *>
map (GL_HASH_MAP, undetermined_equals, undetermined_hashcode, NULL, NULL);
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
/* Look up the equivalence class to which this keyword belongs. */ /* Look up the equivalence class to which this keyword belongs. */
EquivalenceClass *equclass; EquivalenceClass *equclass = const_cast<EquivalenceClass *>(map.get(keyword));
for (equclass = partition; equclass; equclass = equclass->_next)
if (equclass->_undetermined_chars_length == undetermined_chars_length
&& equals (equclass->_undetermined_chars, undetermined_chars,
undetermined_chars_length))
break;
if (equclass == NULL) if (equclass == NULL)
{ {
equclass = new EquivalenceClass(); equclass = new EquivalenceClass();
equclass->_keywords = NULL; equclass->_keywords = NULL;
equclass->_keywords_last = NULL; equclass->_keywords_last = NULL;
equclass->_cardinality = 0; equclass->_cardinality = 0;
equclass->_undetermined_chars = undetermined_chars;
equclass->_undetermined_chars_length = undetermined_chars_length;
equclass->_next = NULL; equclass->_next = NULL;
/* Map this keyword (and all equivalent ones that will be seen later)
to equclass. */
map.put(keyword, equclass);
if (partition) if (partition)
partition_last->_next = equclass; partition_last->_next = equclass;
else else
partition = equclass; partition = equclass;
partition_last = equclass; partition_last = equclass;
} }
else
delete[] undetermined_chars;
/* Add the keyword to the equivalence class. */ /* Add the keyword to the equivalence class. */
KeywordExt_List *cons = new KeywordExt_List(keyword); KeywordExt_List *cons = new KeywordExt_List(keyword);
@@ -1037,10 +1072,6 @@ Search::compute_partition (bool *undetermined) const
equclass->_cardinality++; equclass->_cardinality++;
} }
/* Free some of the allocated memory. The caller doesn't need it. */
for (EquivalenceClass *cls = partition; cls; cls = cls->_next)
delete[] cls->_undetermined_chars;
return partition; return partition;
} }
@@ -1052,7 +1083,6 @@ delete_partition (EquivalenceClass *partition)
EquivalenceClass *equclass = partition; EquivalenceClass *equclass = partition;
partition = equclass->_next; partition = equclass->_next;
delete_list (equclass->_keywords); delete_list (equclass->_keywords);
//delete[] equclass->_undetermined_chars; // already freed above
delete equclass; delete equclass;
} }
} }
@@ -1572,6 +1602,13 @@ Search::find_good_asso_values ()
delete[] best_asso_values; delete[] best_asso_values;
/* The keywords' _hash_value fields are recomputed below. */ /* The keywords' _hash_value fields are recomputed below. */
} }
/* Memory deallocation in each Keyword. */
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
delete[] keyword->_undetermined_chars;
}
} }
/* ========================================================================= */ /* ========================================================================= */