From b1ff3c70b1370668131049e42817b896bbd3746b Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sat, 19 Apr 2025 15:56:43 +0200 Subject: [PATCH] Optimize: Use an array-list instead of a linked-list of keywords. Storing list elements in contiguous memory means: less cache misses. This reduces the execution time of gperf on large inputs by ca. 30%. * src/arraylist.h: New file. * src/arraylist.cc: New file. * src/Makefile.in (OBJECTS): Add arraylist.$(OBJEXT). (ARRAYLIST_H): New variable. (arraylist.$(OBJEXT)): New rule. (search.$(OBJEXT)): Update dependencies. (SOURCE_FILES): Add arraylist.cc and arraylist.h. * src/search.cc: Include arraylist.h. (struct EquivalenceClass): An an ArrayList field. Remove the linked-list fields. Add a constructor. (Search::compute_partition, delete_partition): Update. (Search::count_possible_collisions, Search::unchanged_partition, Search::find_asso_values): Update. --- ChangeLog | 19 ++++++ src/Makefile.in | 21 +++++- src/arraylist.cc | 42 ++++++++++++ src/arraylist.h | 168 +++++++++++++++++++++++++++++++++++++++++++++++ src/search.cc | 45 ++++++------- 5 files changed, 267 insertions(+), 28 deletions(-) create mode 100644 src/arraylist.cc create mode 100644 src/arraylist.h diff --git a/ChangeLog b/ChangeLog index 78cb180..60d5f21 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2025-04-19 Bruno Haible + + Optimize: Use an array-list instead of a linked-list of keywords. + Storing list elements in contiguous memory means: less cache misses. + This reduces the execution time of gperf on large inputs by ca. 30%. + * src/arraylist.h: New file. + * src/arraylist.cc: New file. + * src/Makefile.in (OBJECTS): Add arraylist.$(OBJEXT). + (ARRAYLIST_H): New variable. + (arraylist.$(OBJEXT)): New rule. + (search.$(OBJEXT)): Update dependencies. + (SOURCE_FILES): Add arraylist.cc and arraylist.h. + * src/search.cc: Include arraylist.h. + (struct EquivalenceClass): An an ArrayList field. Remove the linked-list + fields. Add a constructor. + (Search::compute_partition, delete_partition): Update. + (Search::count_possible_collisions, Search::unchanged_partition, + Search::find_asso_values): Update. + 2025-04-19 Bruno Haible Optimize: Minimize object references in find_asso_values. diff --git a/src/Makefile.in b/src/Makefile.in index 5d816ce..eed0fbb 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -61,8 +61,19 @@ SHELL = /bin/sh VPATH = $(srcdir) -OBJECTS = version.$(OBJEXT) positions.$(OBJEXT) options.$(OBJEXT) keyword.$(OBJEXT) keyword-list.$(OBJEXT) \ - input.$(OBJEXT) bool-array.$(OBJEXT) hash-table.$(OBJEXT) search.$(OBJEXT) output.$(OBJEXT) main.$(OBJEXT) +OBJECTS = \ + version.$(OBJEXT) \ + positions.$(OBJEXT) \ + options.$(OBJEXT) \ + keyword.$(OBJEXT) \ + keyword-list.$(OBJEXT) \ + input.$(OBJEXT) \ + arraylist.$(OBJEXT) \ + bool-array.$(OBJEXT) \ + hash-table.$(OBJEXT) \ + search.$(OBJEXT) \ + output.$(OBJEXT) \ + main.$(OBJEXT) LIBS = ../lib/libgp.a @GPERF_LIBM@ CPPFLAGS = @CPPFLAGS@ \ -I. -I$(srcdir) \ @@ -92,6 +103,7 @@ OPTIONS_H = options.h options.icc $(POSITIONS_H) KEYWORD_H = keyword.h keyword.icc KEYWORD_LIST_H = keyword-list.h keyword-list.icc $(KEYWORD_H) INPUT_H = input.h $(KEYWORD_LIST_H) +ARRAYLIST_H = arraylist.h BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H) HASH_TABLE_H = hash-table.h $(KEYWORD_H) SEARCH_H = search.h $(KEYWORD_LIST_H) $(POSITIONS_H) $(BOOL_ARRAY_H) @@ -109,11 +121,13 @@ keyword-list.$(OBJEXT): keyword-list.cc $(CONFIG_H) $(KEYWORD_LIST_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/keyword-list.cc input.$(OBJEXT): input.cc $(CONFIG_H) $(INPUT_H) $(OPTIONS_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/input.cc +arraylist.$(OBJEXT): arraylist.cc $(CONFIG_H) $(ARRAYLIST_H) + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/arraylist.cc bool-array.$(OBJEXT): bool-array.cc $(CONFIG_H) $(BOOL_ARRAY_H) $(OPTIONS_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/bool-array.cc hash-table.$(OBJEXT): hash-table.cc $(CONFIG_H) $(HASH_TABLE_H) $(OPTIONS_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/hash-table.cc -search.$(OBJEXT): search.cc $(CONFIG_H) $(SEARCH_H) $(OPTIONS_H) $(HASH_TABLE_H) +search.$(OBJEXT): search.cc $(CONFIG_H) $(SEARCH_H) $(OPTIONS_H) $(HASH_TABLE_H) $(ARRAYLIST_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/search.cc output.$(OBJEXT): output.cc $(CONFIG_H) $(OUTPUT_H) $(OPTIONS_H) $(VERSION_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/output.cc @@ -151,6 +165,7 @@ SOURCE_FILES = \ keyword.cc $(KEYWORD_H) \ keyword-list.cc $(KEYWORD_LIST_H) \ input.cc $(INPUT_H) \ + arraylist.cc $(ARRAYLIST_H) \ bool-array.cc $(BOOL_ARRAY_H) \ hash-table.cc $(HASH_TABLE_H) \ search.cc $(SEARCH_H) \ diff --git a/src/arraylist.cc b/src/arraylist.cc new file mode 100644 index 0000000..e23d7d7 --- /dev/null +++ b/src/arraylist.cc @@ -0,0 +1,42 @@ +/* This may look like C code, but it is really -*- C++ -*- */ + +/* Array-list container. + + Copyright (C) 2025 Free Software Foundation, Inc. + Written by Bruno Haible . + + This file is part of GNU GPERF. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "arraylist.h" + +#include + +void ArrayListRepresentation::ensure_capacity (size_t n, size_t size_of_T) +{ + size_t new_max = 2 * _nitems_max + 1; + if (new_max < n) + new_max = n; + void *new_item = realloc (_item, new_max * size_of_T); + if (new_item == NULL) + throw std::bad_alloc(); + /* The realloc() call has moved the elements from the old storage to the + new storage. The old storage is thus now considered uninitialized. */ + _item = new_item; + _nitems_max = new_max; +} diff --git a/src/arraylist.h b/src/arraylist.h new file mode 100644 index 0000000..9b287e3 --- /dev/null +++ b/src/arraylist.h @@ -0,0 +1,168 @@ +/* This may look like C code, but it is really -*- C++ -*- */ + +/* Array-list container. + + Copyright (C) 2025 Free Software Foundation, Inc. + Written by Bruno Haible . + + This file is part of GNU GPERF. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef arraylist_h +#define arraylist_h 1 + +#include /* defines size_t, malloc(), realloc(), free(), abort() */ +#include +#if __cplusplus >= 201103L +# include +#endif + +/* ArrayList is a list of elements of type T, stored contiguously in memory + (in order to make good use of memory caches in a CPU). + T must be a type whose contents may be freely moved in memory (i.e. without + fields that contain backpointers to the element itself). + It is like std::vector, but we don't want the bloated C++ standard + library. + It is like gnulib's "gl_list.hh" with the GL_ARRAY_LIST implementation. + But here we don't want polymorphic containers (that force function calls at + runtime); instead we want optimal inlining. */ + +/* In order to avoid the need for explicit instantiation for each possible + template parameter, we make the class ArrayList entirely inline. + The class ArrayListRepresentation is used to attach methods which we want + to be compiled only once, avoiding duplicated code for each possible + template parameter. */ + +class ArrayListRepresentation +{ + template + friend class ArrayList; +public: + // ------------------------------ Constructors ------------------------------ + + ArrayListRepresentation () + { + _item = NULL; + _nitems = 0; + _nitems_max = 0; + } + + // ------------------------------ Private stuff ------------------------------ + +private: + /* Vector of entries. */ + void * _item; + /* Number of elements of the vector that are used. */ + size_t _nitems; + /* Number of elements of the vector that can be used at most. */ + size_t _nitems_max; + + /* Ensures that _nitems_max >= n. + To be called only when _nitems_max < n. */ + void ensure_capacity (size_t n, size_t size_of_T); +}; + +template + class ArrayList + { + public: + // ----------------------------- Constructors ----------------------------- + + /* Creates a new ArrayList with 0 elements, + with no storage allocated initially. */ + ArrayList () + : _rep () {} + + // ------------------------------ Destructor ------------------------------ + + ~ArrayList () + { + if (_rep._item != NULL) + { + /* Destruct the elements (in the opposite order of their + initialization). */ + #if __cplusplus >= 201103L + /* See . */ + if (! std::is_trivially_destructible::value) + #endif + { + size_t index = _rep._nitems; + while (index > 0) + { + --index; + (static_cast(_rep._item))[index].~T (); + } + } + /* Free the storage. */ + free (_rep._item); + } + } + + // ---------------------- Read-only member functions ---------------------- + + /* Returns the current number of elements in the list. */ + size_t size () const + { + return _rep._nitems; + } + + T& get_at (size_t index) const + { + #if !__OPTIMIZE__ + if (index >= _rep._nitems) + /* index out of range. */ + abort (); + #endif + return (static_cast(_rep._item))[index]; + } + + // ---------------------- Modifying member functions ---------------------- + + void set_at (size_t index, const T& value) + { + #if !__OPTIMIZE__ + if (index >= _rep._nitems) + /* index out of range. */ + abort (); + #endif + (static_cast(_rep._item))[index] = value; + } + + size_t add_last (const T& value) + { + if (_rep._nitems == _rep._nitems_max) + ensure_capacity (_rep._nitems + 1); + size_t index = _rep._nitems; + new (&(static_cast(_rep._item))[index]) T (value); + _rep._nitems++; + return index; + } + + // ----------------------------- Private stuff ----------------------------- + + private: + /* Here, the vector of entries is a 'T *', but only the first _nitems + elements are initialized. The remaining memory is uninitialized. */ + ArrayListRepresentation _rep; + + /* Ensures that _nitems_max >= n. + To be called only when _nitems_max < n. */ + void ensure_capacity (size_t n) + { + _rep.ensure_capacity (n, sizeof (T)); + } + }; + +#endif diff --git a/src/search.cc b/src/search.cc index f6e83db..d7116f6 100644 --- a/src/search.cc +++ b/src/search.cc @@ -32,6 +32,7 @@ #include "gl_map.hh" #include "gl_hash_map.h" #include "options.h" +#include "arraylist.h" #include "hash-table.h" /* ============================== Portability ============================== */ @@ -948,12 +949,12 @@ Search::prepare_asso_values () struct EquivalenceClass { /* The keywords in this equivalence class. */ - KeywordExt_List * _keywords; - KeywordExt_List * _keywords_last; - /* The number of keywords in this equivalence class. */ - unsigned int _cardinality; + ArrayList _keywords; EquivalenceClass * _next; + + /* Constructor. */ + EquivalenceClass () : _keywords () {} }; struct Step @@ -1046,9 +1047,6 @@ Search::compute_partition (bool *undetermined) const if (equclass == NULL) { equclass = new EquivalenceClass(); - equclass->_keywords = NULL; - equclass->_keywords_last = NULL; - equclass->_cardinality = 0; equclass->_next = NULL; /* Map this keyword (and all equivalent ones that will be seen later) @@ -1063,13 +1061,7 @@ Search::compute_partition (bool *undetermined) const } /* Add the keyword to the equivalence class. */ - KeywordExt_List *cons = new KeywordExt_List(keyword); - if (equclass->_keywords) - equclass->_keywords_last->rest() = cons; - else - equclass->_keywords = cons; - equclass->_keywords_last = cons; - equclass->_cardinality++; + equclass->_keywords.add_last (keyword); } return partition; @@ -1082,7 +1074,6 @@ delete_partition (EquivalenceClass *partition) { EquivalenceClass *equclass = partition; partition = equclass->_next; - delete_list (equclass->_keywords); delete equclass; } } @@ -1104,9 +1095,10 @@ Search::count_possible_collisions (EquivalenceClass *partition, unsigned int c) for (unsigned int i = 0; i <= m; i++) split_cardinalities[i] = 0; - for (KeywordExt_List *temp = cls->_keywords; temp; temp = temp->rest()) + size_t cls_size = cls->_keywords.size(); + for (size_t index = 0; index < cls_size; index++) { - KeywordExt *keyword = temp->first(); + KeywordExt *keyword = cls->_keywords.get_at(index); unsigned int count = 0; for (int i = 0; i < keyword->_selchars_length; i++) @@ -1116,7 +1108,7 @@ Search::count_possible_collisions (EquivalenceClass *partition, unsigned int c) split_cardinalities[count]++; } - sum += cls->_cardinality * cls->_cardinality; + sum += cls->_keywords.size() * cls->_keywords.size(); for (unsigned int i = 0; i <= m; i++) sum -= split_cardinalities[i] * split_cardinalities[i]; } @@ -1133,16 +1125,17 @@ Search::unchanged_partition (EquivalenceClass *partition, unsigned int c) const { unsigned int first_count = UINT_MAX; - for (KeywordExt_List *temp = cls->_keywords; temp; temp = temp->rest()) + size_t cls_size = cls->_keywords.size(); + for (size_t index = 0; index < cls_size; index++) { - KeywordExt *keyword = temp->first(); + KeywordExt *keyword = cls->_keywords.get_at(index); unsigned int count = 0; for (int i = 0; i < keyword->_selchars_length; i++) if (keyword->_selchars[i] == c) count++; - if (temp == cls->_keywords) + if (index == 0) first_count = count; else if (count != first_count) /* c would split this equivalence class. */ @@ -1295,9 +1288,10 @@ Search::find_asso_values () for (EquivalenceClass *cls = step->_partition; cls; cls = cls->_next) { fprintf (stderr, "\n"); - for (KeywordExt_List *temp = cls->_keywords; temp; temp = temp->rest()) + size_t cls_size = cls->_keywords.size(); + for (size_t index = 0; index < cls_size; index++) { - KeywordExt *keyword = temp->first(); + KeywordExt *keyword = cls->_keywords.get_at(index); fprintf (stderr, " %.*s\n", keyword->_allchars_length, keyword->_allchars); } @@ -1347,9 +1341,10 @@ Search::find_asso_values () /* Iteration Number array is a win, O(1) initialization time! */ _collision_detector->clear (); - for (KeywordExt_List *ptr = cls->_keywords; ptr; ptr = ptr->rest()) + size_t cls_size = cls->_keywords.size(); + for (size_t index = 0; index < cls_size; index++) { - KeywordExt *keyword = ptr->first(); + KeywordExt *keyword = cls->_keywords.get_at(index); /* Compute the new hash code for the keyword, leaving apart the yet undetermined asso_values[]. */