New option --ignore-case.

2025-12-02 13:09:22 +00:00 · 2003-04-02 09:26:05 +00:00
parent 0093e33163
commit 7dfd32b736
15 changed files with 583 additions and 39 deletions
--- a/44
+++ b/44
@@ -1,5 +1,49 @@
 2002-12-10  Bruno Haible  <bruno@clisp.org>
 	* src/options.h (UPPERLOWER): New enum value.
 	* src/options.cc (Options::long_usage): Document option --ignore-case.
 	(Options::~Options): Update.
 	(long_options): Add option --ignore-case.
 	(Options::parse_options): Handle option --ignore-case.
 	* src/input.cc (Input::read_input): Recognize option %ignore-case.
 	* src/keyword.h (KeywordExt::init_selchars_tuple,
 	KeywordExt::init_selchars_multiset, KeywordExt::init_selchars_low):
 	Add alpha_unify argument.
 	* src/keyword.cc (KeywordExt::init_selchars_low): Add alpha_unify
 	argument.
 	(KeywordExt::init_selchars_tuple): Add alpha_unify argument.
 	(KeywordExt::init_selchars_multiset): Add alpha_unify argument.
 	* src/search.h (Search::compute_alpha_size,
 	Search::compute_alpha_unify): New declarations.
 	(Search::init_selchars_multiset): Add alpha_unify argument.
 	(Search::_alpha_unify): New field.
 	* src/search.cc (Search::compute_alpha_size,
 	Search::compute_alpha_unify): New functions.
 	(Search::init_selchars_tuple): Update.
 	(Search::find_positions): Temporarily set _alpha_unify. Perform a
 	case insensitive comparison if needed.
 	(Search::init_selchars_multiset): Add alpha_unify argument.
 	(Search::count_duplicates_multiset): Call compute_alpha_unify.
 	(Search::find_alpha_inc): Temporarily set _alpha_unify. At the end,
 	set _alpha_size and _alpha_unify.
 	(Search::prepare): Update. Don't compute _alpga_size here.
 	(Search::optimize): Propagate unified asso_values.
 	(Search::~Search) Delete _alpha_unify.
 	* src/output.cc (output_upperlower_strcmp, output_upperlower_strncmp,
 	output_upperlower_memcmp): New functions.
 	(Output_Compare_Strcmp::output_comparison,
 	Output_Compare_Strncmp::output_comparison,
 	Output_Compare_Memcmp::output_comparison): Use the case-insensitive
 	comparison function if --ignore-case was given.
 	(Output::output): Emit the auxiliary case-insensitive comparison
 	function if needed.
 	* tests/permutc2.gperf, tests/permutc2.exp: New files.
 	* tests/Makefile.in (check-test): Also check permutc2.gperf.
 	* tests/test-6.exp: Update.
 	* doc/gperf.texi (Gperf Declarations): Document %ignore-case.
 	(Input Details): Document option --ignore-case.
 	* NEWS: Update.
 	* src/search.cc (Search::optimize): Fill unused asso_values[] entries
 	with a large value.
 	* src/output.h (Output::Output): Remove occurrences argument.
--- a/2
+++ b/2
@@ -10,6 +10,7 @@ New in 2.97:
 * The following options can now be specified inside the input file:
    %delimiters=DELIMITER-LIST
    %struct-type
    %ignore-case
    %language=LANGUAGE-NAME
    %define slot-name NAME
    %define hash-function-name NAME
@@ -42,6 +43,7 @@ New in 2.97:
 * The options -f/--fast and -o/--occurrence-sort have no effect any more.
 * Added option -P/--pic that optimizes the generated code for use in shared
  libraries.
 * Added option --ignore-case that produces a case independent lookup function.
 * Bug fixes.
 New in 2.7.2:
--- a/doc/gperf.texi
+++ b/doc/gperf.texi
@@ -436,6 +436,12 @@ commas or newlines.
 Allows you to include a @code{struct} type declaration for generated
 code; see above for an example.
@item %ignore-case
@cindex @samp{%ignore-case}
 Consider upper and lower case ASCII characters as equivalent.  The string
 comparison will use a case insignificant character comparison.  Note that
 locale dependent case mappings are ignored.
@item %language=@var{language-name}
@cindex @samp{%language}
 Instructs @code{gperf} to generate code in the language specified by the
@@ -790,6 +796,17 @@ part of the type declaration.  Keywords and additional fields may follow
 this, one group of fields per line.  A set of examples for generating
 perfect hash tables and functions for Ada, C, C++, Pascal, Modula 2,
 Modula 3 and JavaScript reserved words are distributed with this release.
@item --ignore-case
 Consider upper and lower case ASCII characters as equivalent.  The string
 comparison will use a case insignificant character comparison.  Note that
 locale dependent case mappings are ignored.  This option is therefore not
 suitable if a properly internationalized or locale aware case mapping
 should be used.  (For example, in a Turkish locale, the upper case equivalent
 of the lowercase ASCII letter @samp{i} is the non-ASCII character
@samp{capital i with dot above}.)  For this case, it is better to apply
 an uppercase or lowercase conversion on the string before passing it to
 the @code{gperf} generated function.
@end table
@node Output Language, Output Details, Input Details, Options
--- a/src/input.cc
+++ b/src/input.cc
@@ -472,6 +472,10 @@ Input::read_input ()
                  option.set (TYPE);
                else
                if (is_declaration (line, line_end, lineno, "ignore-case"))
                  option.set (UPPERLOWER);
                else
                if (is_declaration_with_arg (line, line_end, lineno,
                                             "language", &arg))
                  option.set_language (arg);
--- a/src/keyword.cc
+++ b/src/keyword.cc
@@ -59,7 +59,7 @@ static inline void sort_char_set (unsigned int *base, int len)
 */
 unsigned int *
-KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_inc)
+KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
 {
  const char *k = _allchars;
  unsigned int *key_set =
@@ -73,6 +73,8 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
        unsigned int c = static_cast<unsigned char>(*k);
        if (alpha_inc)
          c += alpha_inc[k-_allchars];
        if (alpha_unify)
          c = alpha_unify[c];
        *ptr = c;
        ptr++;
      }
@@ -99,6 +101,8 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
          else
            /* Out of range of KEY length, so we'll just skip it.  */
            continue;
          if (alpha_unify)
            c = alpha_unify[c];
          *ptr = c;
          ptr++;
        }
@@ -111,16 +115,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
 }
 void
-KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions)
+KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify)
 {
-  init_selchars_low (use_all_chars, positions, NULL);
+  init_selchars_low (use_all_chars, positions, alpha_unify, NULL);
 }
 void
-KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_inc)
+KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
 {
  unsigned int *selchars =
-    init_selchars_low (use_all_chars, positions, alpha_inc);
+    init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc);
  /* Sort the selchars elements alphabetically.  */
  sort_char_set (selchars, _selchars_length);
--- a/src/keyword.h
+++ b/src/keyword.h
@@ -68,9 +68,9 @@ struct KeywordExt : public Keyword
  /* Methods depending on the keyposition list.  */
  /* Initializes selchars and selchars_length, without reordering.  */
-  void                  init_selchars_tuple (bool use_all_chars, const Positions& positions);
+  void                  init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify);
  /* Initializes selchars and selchars_length, with reordering.  */
-  void                  init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_inc);
+  void                  init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
  /* Deletes selchars.  */
  void                  delete_selchars ();
@@ -81,7 +81,7 @@ struct KeywordExt : public Keyword
  int                   _final_index;
 private:
-  unsigned int *        init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_inc);
+  unsigned int *        init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
 };
 /* An abstract factory for creating Keyword instances.
--- a/src/options.cc
+++ b/src/options.cc
@@ -106,6 +106,10 @@ Options::long_usage (FILE * stream) const
           "                         is considered part of the type declaration. Key\n"
           "                         words and additional fields may follow this, one\n"
           "                         group of fields per line.\n");
  fprintf (stream,
           "      --ignore-case      Consider upper and lower case ASCII characters as\n"
           "                         equivalent. Note that locale dependent case mappings\n"
           "                         are ignored.\n");
  fprintf (stream, "\n");
  fprintf (stream,
           "Language for the output code:\n");
@@ -463,6 +467,7 @@ Options::~Options ()
               "\nENUM is........: %s"
               "\nINCLUDE is.....: %s"
               "\nSEVENBIT is....: %s"
               "\nUPPERLOWER is..: %s"
               "\nlookup function name = %s"
               "\nhash function name = %s"
               "\nword list name = %s"
@@ -492,6 +497,7 @@ Options::~Options ()
               _option_word & ENUM ? "enabled" : "disabled",
               _option_word & INCLUDE ? "enabled" : "disabled",
               _option_word & SEVENBIT ? "enabled" : "disabled",
               _option_word & UPPERLOWER ? "enabled" : "disabled",
               _function_name, _hash_name, _wordlist_name, _slot_name,
               _initializer_suffix, _asso_iterations, _jump, _size_multiple,
               _initial_asso_value, _delimiters, _total_switches);
@@ -605,6 +611,7 @@ Options::set_delimiters (const char *delimiters)
 static const struct option long_options[] =
 {
  { "output-file", required_argument, NULL, CHAR_MAX + 1 },
  { "ignore-case", no_argument, NULL, CHAR_MAX + 2 },
  { "delimiters", required_argument, NULL, 'e' },
  { "struct-type", no_argument, NULL, 't' },
  { "language", required_argument, NULL, 'L' },
@@ -949,6 +956,11 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
            _output_file_name = /*getopt*/optarg;
            break;
          }
        case CHAR_MAX + 2:      /* Case insignificant.  */
          {
            _option_word |= UPPERLOWER;
            break;
          }
        default:
          short_usage (stderr);
          exit (1);
--- a/src/options.h
+++ b/src/options.h
@@ -98,7 +98,10 @@ enum Option_Type
  SEVENBIT     = 1 << 19,
  /* Optimize for position-independent code.  */
-  SHAREDLIB    = 1 << 20
+  SHAREDLIB    = 1 << 20,
  /* Ignore case of ASCII characters.  */
  UPPERLOWER   = 1 << 21
 };
 /* Class manager for gperf program Options. */
--- a/src/output.cc
+++ b/src/output.cc
@@ -232,6 +232,131 @@ Output::output_constants (struct Output_Constants& style) const
 /* ------------------------------------------------------------------------- */
 /* Output gperf's ASCII-case insensitive strcmp replacement.  */
 static void
 output_upperlower_strcmp ()
 {
  printf ("#ifndef GPERF_CASE_STRCMP\n"
          "#define GPERF_CASE_STRCMP 1\n"
          "static int\n"
          "gperf_case_strcmp ");
  printf (option[KRC] ?
               "(s1, s2)\n"
          "     register char *s1;\n"
          "     register char *s2;\n" :
          option[C] ?
               "(s1, s2)\n"
          "     register const char *s1;\n"
          "     register const char *s2;\n" :
          option[ANSIC] | option[CPLUSPLUS] ?
               "(register const char *s1, register const char *s2)\n" :
          "");
  printf ("{\n"
          "  for (;;)\n"
          "    {\n"
          "      unsigned char c1 = *s1++;\n"
          "      unsigned char c2 = *s2++;\n"
          "      if (c1 >= 'A' && c1 <= 'Z')\n"
          "        c1 += 'a' - 'A';\n"
          "      if (c2 >= 'A' && c2 <= 'Z')\n"
          "        c2 += 'a' - 'A';\n"
          "      if (c1 != 0 && c1 == c2)\n"
          "        continue;\n"
          "      return (int)c1 - (int)c2;\n"
          "    }\n"
          "}\n"
          "#endif\n\n");
 }
 /* Output gperf's ASCII-case insensitive strncmp replacement.  */
 static void
 output_upperlower_strncmp ()
 {
  printf ("#ifndef GPERF_CASE_STRNCMP\n"
          "#define GPERF_CASE_STRNCMP 1\n"
          "static int\n"
          "gperf_case_strncmp ");
  printf (option[KRC] ?
               "(s1, s2, n)\n"
          "     register char *s1;\n"
          "     register char *s2;\n"
          "     register unsigned int n;\n" :
          option[C] ?
               "(s1, s2, n)\n"
          "     register const char *s1;\n"
          "     register const char *s2;\n"
          "     register unsigned int n;\n" :
          option[ANSIC] | option[CPLUSPLUS] ?
               "(register const char *s1, register const char *s2, register unsigned int n)\n" :
          "");
  printf ("{\n"
          "  for (; n > 0;)\n"
          "    {\n"
          "      unsigned char c1 = *s1++;\n"
          "      unsigned char c2 = *s2++;\n"
          "      if (c1 >= 'A' && c1 <= 'Z')\n"
          "        c1 += 'a' - 'A';\n"
          "      if (c2 >= 'A' && c2 <= 'Z')\n"
          "        c2 += 'a' - 'A';\n"
          "      if (c1 != 0 && c1 == c2)\n"
          "        {\n"
          "          n--;\n"
          "          continue;\n"
          "        }\n"
          "      return (int)c1 - (int)c2;\n"
          "    }\n"
          "  return 0;\n"
          "}\n"
          "#endif\n\n");
 }
 /* Output gperf's ASCII-case insensitive memcmp replacement.  */
 static void
 output_upperlower_memcmp ()
 {
  printf ("#ifndef GPERF_CASE_MEMCMP\n"
          "#define GPERF_CASE_MEMCMP 1\n"
          "static int\n"
          "gperf_case_memcmp ");
  printf (option[KRC] ?
               "(s1, s2, n)\n"
          "     register char *s1;\n"
          "     register char *s2;\n"
          "     register unsigned int n;\n" :
          option[C] ?
               "(s1, s2, n)\n"
          "     register const char *s1;\n"
          "     register const char *s2;\n"
          "     register unsigned int n;\n" :
          option[ANSIC] | option[CPLUSPLUS] ?
               "(register const char *s1, register const char *s2, register unsigned int n)\n" :
          "");
  printf ("{\n"
          "  for (; n > 0;)\n"
          "    {\n"
          "      unsigned char c1 = *s1++;\n"
          "      unsigned char c2 = *s2++;\n"
          "      if (c1 >= 'A' && c1 <= 'Z')\n"
          "        c1 += 'a' - 'A';\n"
          "      if (c2 >= 'A' && c2 <= 'Z')\n"
          "        c2 += 'a' - 'A';\n"
          "      if (c1 == c2)\n"
          "        {\n"
          "          n--;\n"
          "          continue;\n"
          "        }\n"
          "      return (int)c1 - (int)c2;\n"
          "    }\n"
          "  return 0;\n"
          "}\n"
          "#endif\n\n");
 }
 /* ------------------------------------------------------------------------- */
 /* Outputs a keyword, as a string: enclosed in double quotes, escaping
   backslashes, double quote and unprintable characters.  */
@@ -363,7 +488,10 @@ void Output_Compare_Strcmp::output_comparison (const Output_Expr& expr1,
  expr1.output_expr ();
  printf (" == *");
  expr2.output_expr ();
-  printf (" && !strcmp (");
+  printf (" && !");
  if (option[UPPERLOWER])
    printf ("gperf_case_");
  printf ("strcmp (");
  expr1.output_expr ();
  printf (" + 1, ");
  expr2.output_expr ();
@@ -389,7 +517,10 @@ void Output_Compare_Strncmp::output_comparison (const Output_Expr& expr1,
  expr1.output_expr ();
  printf (" == *");
  expr2.output_expr ();
-  printf (" && !strncmp (");
+  printf (" && !");
  if (option[UPPERLOWER])
    printf ("gperf_case_");
  printf ("strncmp (");
  expr1.output_expr ();
  printf (" + 1, ");
  expr2.output_expr ();
@@ -418,7 +549,10 @@ void Output_Compare_Memcmp::output_comparison (const Output_Expr& expr1,
  expr1.output_expr ();
  printf (" == *");
  expr2.output_expr ();
-  printf (" && !memcmp (");
+  printf (" && !");
  if (option[UPPERLOWER])
    printf ("gperf_case_");
  printf ("memcmp (");
  expr1.output_expr ();
  printf (" + 1, ");
  expr2.output_expr ();
@@ -1522,6 +1656,19 @@ Output::output ()
  printf ("/* maximum key range = %d, duplicates = %d */\n\n",
          _max_hash_value - _min_hash_value + 1, _total_duplicates);
  if (option[UPPERLOWER])
    {
      if (option[LENTABLE])
        output_upperlower_memcmp ();
      else
        {
          if (option[COMP])
            output_upperlower_strncmp ();
          else
            output_upperlower_strcmp ();
        }
    }
  if (option[CPLUSPLUS])
    printf ("class %s\n"
            "{\n"
--- a/src/search.cc
+++ b/src/search.cc
@@ -146,12 +146,39 @@ Search::preprepare ()
 /* ====================== Finding good byte positions ====================== */
 /* Computes the upper bound on the indices passed to asso_values[],
   assuming no alpha_increments.  */
 unsigned int
 Search::compute_alpha_size () const
 {
  return (option[SEVENBIT] ? 128 : 256);
 }
 /* Computes the unification rules between different asso_values[c],
   assuming no alpha_increments.  */
 unsigned int *
 Search::compute_alpha_unify () const
 {
  if (option[UPPERLOWER])
    {
      unsigned int alpha_size = compute_alpha_size();
      unsigned int *alpha_unify = new unsigned int[alpha_size];
      for (unsigned int c = 0; c < alpha_size; c++)
        alpha_unify[c] = c;
      for (unsigned int c = 'A'; c <= 'Z'; c++)
        alpha_unify[c] = c + ('a'-'A');
      return alpha_unify;
    }
  else
    return NULL;
 }
 /* Initializes each keyword's _selchars array.  */
 void
 Search::init_selchars_tuple (bool use_all_chars, const Positions& positions) const
 {
  for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
-    temp->first()->init_selchars_tuple(use_all_chars, positions);
+    temp->first()->init_selchars_tuple(use_all_chars, positions, _alpha_unify);
 }
 /* Deletes each keyword's _selchars array.  */
@@ -202,6 +229,9 @@ Search::find_positions ()
      return;
    }
  /* Compute preliminary value for _alpha_unify.  */
  _alpha_unify = compute_alpha_unify ();
  /* 1. Find positions that must occur in order to distinguish duplicates.  */
  Positions mandatory;
@@ -222,17 +252,42 @@ Search::find_positions ()
                  int n = keyword1->_allchars_length;
                  int i;
                  for (i = 1; i < n; i++)
                    if (keyword1->_allchars[i-1] != keyword2->_allchars[i-1])
                      break;
                  if (i < n
                      && memcmp (&keyword1->_allchars[i],
                                 &keyword2->_allchars[i],
                                 n - i)
                         == 0)
                    {
-                      /* Position i is mandatory.  */
+                      unsigned char c1 = keyword1->_allchars[i-1];
-                      if (!mandatory.contains (i))
+                      unsigned char c2 = keyword2->_allchars[i-1];
-                        mandatory.add (i);
+                      if (option[UPPERLOWER])
                        {
                          if (c1 >= 'A' && c1 <= 'Z')
                            c1 += 'a' - 'A';
                          if (c2 >= 'A' && c2 <= 'Z')
                            c2 += 'a' - 'A';
                        }
                      if (c1 != c2)
                        break;
                    }
                  if (i < n)
                    {
                      int j;
                      for (j = i + 1; j <= n; j++)
                        {
                          unsigned char c1 = keyword1->_allchars[j-1];
                          unsigned char c2 = keyword2->_allchars[j-1];
                          if (option[UPPERLOWER])
                            {
                              if (c1 >= 'A' && c1 <= 'Z')
                                c1 += 'a' - 'A';
                              if (c2 >= 'A' && c2 <= 'Z')
                                c2 += 'a' - 'A';
                            }
                          if (c1 != c2)
                            break;
                        }
                      if (j > n)
                        {
                          /* Position i is mandatory.  */
                          if (!mandatory.contains (i))
                            mandatory.add (i);
                        }
                    }
                }
            }
@@ -379,16 +434,113 @@ Search::find_positions ()
        }
      fprintf (stderr, "\n");
    }
  /* Free preliminary value for _alpha_unify.  */
  delete[] _alpha_unify;
 }
 /* ===================== Finding good alpha increments ===================== */
 /* Computes the upper bound on the indices passed to asso_values[].  */
 unsigned int
 Search::compute_alpha_size (const unsigned int *alpha_inc) const
 {
  unsigned int max_alpha_inc = 0;
  for (int i = 0; i < _max_key_len; i++)
    if (max_alpha_inc < alpha_inc[i])
      max_alpha_inc = alpha_inc[i];
  return (option[SEVENBIT] ? 128 : 256) + max_alpha_inc;
 }
 /* Computes the unification rules between different asso_values[c].  */
 unsigned int *
 Search::compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const
 {
  if (option[UPPERLOWER])
    {
      /* Without alpha increments, we would simply unify
           'A' -> 'a', ..., 'Z' -> 'z'.
         But when a keyword contains at position i a character c,
         we have the constraint
            asso_values[tolower(c) + alpha_inc[i]] ==
            asso_values[toupper(c) + alpha_inc[i]].
         This introduces a unification
           toupper(c) + alpha_inc[i] -> tolower(c) + alpha_inc[i].
         Note that this unification can extend outside the range of
         ASCII letters!  But still every unified character pair is at
         a distance of 'a'-'A' = 32, or (after chained unification)
         at a multiple of 32.  So in the end the alpha_unify vector has
         the form    c -> c + 32 * f(c)   where f(c) is a nonnegative
         integer.  */
      unsigned int alpha_size = compute_alpha_size (alpha_inc);
      unsigned int *alpha_unify = new unsigned int[alpha_size];
      for (unsigned int c = 0; c < alpha_size; c++)
        alpha_unify[c] = c;
      for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
        {
          KeywordExt *keyword = temp->first();
          if (option[ALLCHARS])
            /* Iterate through all character positions.  */
            for (int i = 0; i < keyword->_allchars_length; i++)
              {
                unsigned int c = static_cast<unsigned char>(keyword->_allchars[i]);
                if (c >= 'A' && c <= 'Z')
                  c += 'a' - 'A';
                if (c >= 'a' && c <= 'z')
                  {
                    c += alpha_inc[i];
                    /* Unify c with c - ('a'-'A').  */
                    unsigned int d = alpha_unify[c];
                    unsigned int b = c - ('a'-'A');
                    for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
                      alpha_unify[a] = d;
                  }
              }
          else
            {
              /* Iterate through the selected character positions.  */
              PositionIterator iter (positions);
              for (int i; (i = iter.next ()) != PositionIterator::EOS; )
                {
                  unsigned int c;
                  if (i == Positions::LASTCHAR)
                    c = static_cast<unsigned char>(keyword->_allchars[keyword->_allchars_length - 1]);
                  else if (i <= keyword->_allchars_length)
                    c = static_cast<unsigned char>(keyword->_allchars[i - 1]);
                  else
                    continue;
                  if (c >= 'A' && c <= 'Z')
                    c += 'a' - 'A';
                  if (c >= 'a' && c <= 'z')
                    {
                      if (i != Positions::LASTCHAR)
                        c += alpha_inc[i - 1];
                      /* Unify c with c - ('a'-'A').  */
                      unsigned int d = alpha_unify[c];
                      unsigned int b = c - ('a'-'A');
                      for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
                        alpha_unify[a] = d;
                    }
                }
            }
        }
      return alpha_unify;
    }
  else
    /* Identity mapping.  */
    return NULL;
 }
 /* Initializes each keyword's _selchars array.  */
 void
-Search::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_inc) const
+Search::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const
 {
  for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
-    temp->first()->init_selchars_multiset(use_all_chars, positions, alpha_inc);
+    temp->first()->init_selchars_multiset(use_all_chars, positions, alpha_unify, alpha_inc);
 }
 /* Count the duplicate keywords that occur with the given set of positions
@@ -402,7 +554,9 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
  /* Run through the keyword list and count the duplicates incrementally.
     The result does not depend on the order of the keyword list, thanks to
     the formula above.  */
-  init_selchars_multiset (option[ALLCHARS], _key_positions, alpha_inc);
+  init_selchars_multiset (option[ALLCHARS], _key_positions,
                          compute_alpha_unify (_key_positions, alpha_inc),
                          alpha_inc);
  unsigned int count = 0;
  {
@@ -428,7 +582,9 @@ Search::find_alpha_inc ()
  /* The goal is to choose _alpha_inc[] such that it doesn't introduce
     artificial duplicates.
     In other words, the goal is  # proj2 (proj1 (K)) = # proj1 (K).  */
  _alpha_unify = compute_alpha_unify ();
  unsigned int duplicates_goal = count_duplicates_tuple (_key_positions);
  delete[] _alpha_unify;
  /* Start with zero increments.  This is sufficient in most cases.  */
  unsigned int *current = new unsigned int [_max_key_len];
@@ -545,6 +701,8 @@ Search::find_alpha_inc ()
    }
  _alpha_inc = current;
  _alpha_size = compute_alpha_size (_alpha_inc);
  _alpha_unify = compute_alpha_unify (_key_positions, _alpha_inc);
 }
 /* ======================= Finding good asso_values ======================== */
@@ -555,7 +713,8 @@ Search::prepare ()
  KeywordExt_List *temp;
  /* Initialize each keyword's _selchars array.  */
-  init_selchars_multiset(option[ALLCHARS], _key_positions, _alpha_inc);
+  init_selchars_multiset(option[ALLCHARS], _key_positions,
                         _alpha_unify, _alpha_inc);
  /* Check for duplicates, i.e. keywords with the same _selchars array
     (and - if !option[NOLENGTH] - also the same length).
@@ -634,14 +793,6 @@ Search::prepare ()
        }
    }
  /* Compute _alpha_size, the upper bound on the indices passed to
     asso_values[].  */
  unsigned int max_alpha_inc = 0;
  for (int i = 0; i < _max_key_len; i++)
    if (max_alpha_inc < _alpha_inc[i])
      max_alpha_inc = _alpha_inc[i];
  _alpha_size = (option[SEVENBIT] ? 128 : 256) + max_alpha_inc;
  /* Compute the occurrences of each character in the alphabet.  */
  _occurrences = new int[_alpha_size];
  memset (_occurrences, 0, _alpha_size * sizeof (_occurrences[0]));
@@ -1492,6 +1643,12 @@ Search::optimize ()
  for (unsigned int c = 0; c < _alpha_size; c++)
    if (_occurrences[c] == 0)
      _asso_values[c] = max_hash_value + 1;
  /* Propagate unified asso_values.  */
  if (_alpha_unify)
    for (unsigned int c = 0; c < _alpha_size; c++)
      if (_alpha_unify[c] != c)
        _asso_values[c] = _asso_values[_alpha_unify[c]];
 }
 /* Prints out some diagnostics upon completion.  */
@@ -1533,5 +1690,6 @@ Search::~Search ()
    }
  delete[] _asso_values;
  delete[] _occurrences;
  delete[] _alpha_unify;
  delete[] _alpha_inc;
 }
--- a/src/search.h
+++ b/src/search.h
@@ -41,6 +41,14 @@ public:
 private:
  void                  preprepare ();
  /* Computes the upper bound on the indices passed to asso_values[],
     assuming no alpha_increments.  */
  unsigned int          compute_alpha_size () const;
  /* Computes the unification rules between different asso_values[c],
     assuming no alpha_increments.  */
  unsigned int *        compute_alpha_unify () const;
  /* Initializes each keyword's _selchars array.  */
  void                  init_selchars_tuple (bool use_all_chars, const Positions& positions) const;
  /* Deletes each keyword's _selchars array.  */
@@ -52,8 +60,14 @@ private:
  /* Find good key positions.  */
  void                  find_positions ();
  /* Computes the upper bound on the indices passed to asso_values[].  */
  unsigned int          compute_alpha_size (const unsigned int *alpha_inc) const;
  /* Computes the unification rules between different asso_values[c].  */
  unsigned int *        compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const;
  /* Initializes each keyword's _selchars array.  */
-  void                  init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_inc) const;
+  void                  init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;
  /* Count the duplicate keywords that occur with the given set of positions
     and a given alpha_inc[] array.  */
@@ -115,13 +129,17 @@ public:
  /* Adjustments to add to bytes add specific key positions.  */
  unsigned int *        _alpha_inc;
  /* Size of alphabet.  */
  unsigned int          _alpha_size;
  /* Alphabet character unification, either the identity or a mapping from
     upper case characters to lower case characters (and maybe more).  */
  unsigned int *        _alpha_unify;
  /* Total number of duplicates that have been moved to _duplicate_link lists
     (not counting their representatives which stay on the main list).  */
  int                   _total_duplicates;
  /* Size of alphabet.  */
  unsigned int          _alpha_size;
  /* Counts occurrences of each key set character.
     _occurrences[c] is the number of times that c occurs among the _selchars
     of a keyword.  */
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -143,6 +143,8 @@ check-test:
 	diff $(srcdir)/permut2.exp permut2.out
 	$(GPERF) -m5 < $(srcdir)/permut3.gperf > permut3.out
 	diff $(srcdir)/permut3.exp permut3.out
 	$(GPERF) -m5 --ignore-case < $(srcdir)/permutc2.gperf > permutc2.out
 	diff $(srcdir)/permutc2.exp permutc2.out
 	$(GPERF) -C -E -G -I -t < $(srcdir)/charsets.gperf > charsets.out
 	diff $(srcdir)/charsets.exp charsets.out
 	$(GPERF) -C -E -G -I -t < $(srcdir)/languages.gperf > languages.out
--- a/tests/permutc2.exp
+++ b/tests/permutc2.exp
@@ -0,0 +1,116 @@
 /* C code produced by gperf version 2.7.2 */
 /* Command-line: ../src/gperf -m5 --ignore-case  */
 /* Computed positions: -k'1-2' */
 /* Test of a hash function which has to deal with permutation and
   case-independence.  Without case-independence, the alpha_inc is 1.
   With case-independence, the alpha_inc is 3.  */
 #define TOTAL_KEYWORDS 8
 #define MIN_WORD_LENGTH 2
 #define MAX_WORD_LENGTH 2
 #define MIN_HASH_VALUE 2
 #define MAX_HASH_VALUE 9
 /* maximum key range = 8, duplicates = 0 */
 #ifndef GPERF_CASE_STRCMP
 #define GPERF_CASE_STRCMP 1
 static int
 gperf_case_strcmp (s1, s2)
     register const char *s1;
     register const char *s2;
 {
  for (;;)
    {
      unsigned char c1 = *s1++;
      unsigned char c2 = *s2++;
      if (c1 >= 'A' && c1 <= 'Z')
        c1 += 'a' - 'A';
      if (c2 >= 'A' && c2 <= 'Z')
        c2 += 'a' - 'A';
      if (c1 != 0 && c1 == c2)
        continue;
      return (int)c1 - (int)c2;
    }
 }
 #endif
 #ifdef __GNUC__
 __inline
 #else
 #ifdef __cplusplus
 inline
 #endif
 #endif
 static unsigned int
 hash (str, len)
     register const char *str;
     register unsigned int len;
 {
  static unsigned char asso_values[] =
    {
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10, 1,10,10, 3,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10, 3, 1,
      0, 7, 1, 0, 3,10,10, 1,10,10,
      3,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
      3, 1, 0, 0, 1, 0, 2,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10,10,
     10,10,10,10,10,10,10,10,10
    };
  return len + asso_values[(unsigned char)str[1]+3] + asso_values[(unsigned char)str[0]];
 }
 #ifdef __GNUC__
 __inline
 #endif
 const char *
 in_word_set (str, len)
     register const char *str;
     register unsigned int len;
 {
  static const char * wordlist[] =
    {
      "", "",
      "{w",
      "az",
      "ay",
      "za",
      "ya",
      "x{",
      "x[",
      "[w"
    };
  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
    {
      register int key = hash (str, len);
      if (key <= MAX_HASH_VALUE && key >= 0)
        {
          register const char *s = wordlist[key];
          if (*str == *s && !gperf_case_strcmp (str + 1, s + 1))
            return s;
        }
    }
  return 0;
 }
--- a/tests/permutc2.gperf
+++ b/tests/permutc2.gperf
@@ -0,0 +1,14 @@
 %{
 /* Test of a hash function which has to deal with permutation and
   case-independence.  Without case-independence, the alpha_inc is 1.
   With case-independence, the alpha_inc is 3.  */
 %}
 %%
 az
 za
 ay
 ya
 x{
 x[
 {w
 [w
--- a/tests/test-6.exp
+++ b/tests/test-6.exp
@@ -20,6 +20,9 @@ Input file interpretation:
                         is considered part of the type declaration. Key
                         words and additional fields may follow this, one
                         group of fields per line.
      --ignore-case      Consider upper and lower case ASCII characters as
                         equivalent. Note that locale dependent case mappings
                         are ignored.
 Language for the output code:
  -L, --language=LANGUAGE-NAME