1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Add tests for 8-bit clean comparison and binary comparison.

This commit is contained in:
Bruno Haible
2000-08-20 20:35:53 +00:00
parent 3354d156d6
commit ac65860e4b
8 changed files with 161 additions and 1 deletions

View File

@@ -55,7 +55,7 @@ installdirs :
uninstall :
check : check-link-c check-link-c++ check-c check-ada check-modula3 check-pascal check-test
check : check-link-c check-link-c++ check-c check-ada check-modula3 check-pascal check-lang-utf8 check-lang-ucs2 check-test
@true
extracheck : @CHECK_LANG_SYNTAX@
@@ -101,6 +101,23 @@ check-pascal:
./pout -v < $(srcdir)/pascal.gperf > pascal.out
diff $(srcdir)/pascal.exp pascal.out
# check for 8-bit cleanliness
check-lang-utf8:
$(GPERF) -k1 -t -I -K foreign_name < $(srcdir)/lang-utf8.gperf > lu8inset.c
$(CC) $(CFLAGS) -o lu8out lu8inset.c test.o
@echo "testing UTF-8 encoded languages, all items should be found in the set"
sed -e '1,6d' -e 's/,.*//' < $(srcdir)/lang-utf8.gperf | ./lu8out -v > lang-utf8.out
diff $(srcdir)/lang-utf8.exp lang-utf8.out
# check for binary keywords with NUL bytes
check-lang-ucs2:
$(CC) -c $(CFLAGS) $(srcdir)/test2.c
$(GPERF) -k4 -t -l -I -K foreign_name < $(srcdir)/lang-ucs2.gperf > lu2inset.c
$(CC) $(CFLAGS) -o lu2out lu2inset.c test2.o
@echo "testing UCS-2 encoded languages, all items should be found in the set"
./lu2out -v < $(srcdir)/lang-ucs2.in > lang-ucs2.out
diff $(srcdir)/lang-ucs2.exp lang-ucs2.out
# these next 5 are demos that show off the generated code
check-test:
$(GPERF) -L C -F ', 0, 0' -p -j1 -i 1 -g -o -t -G -N is_reserved_word -k1,3,'$$' < $(srcdir)/c-parse.gperf > c-parse.out

20
tests/lang-ucs2.exp Normal file
View File

@@ -0,0 +1,20 @@
in word set 12A0121B122D129B
in word set 010D00650073006B0079
in word set 00440061006E0073006B
in word set 0045006E0067006C006900730068
in word set 00530075006F006D0069
in word set 004600720061006E00E7006100690073
in word set 0044006500750074007300630068
in word set 039503BB03BB03B703BD03B903BA03AC
in word set 05E205D105E805D905EA
in word set 004900740061006C00690061006E006F
in word set 004E006F00720073006B
in word set 0420044304410441043A04380439
in word set 004500730070006100F1006F006C
in word set 005300760065006E0073006B0061
in word set 0E200E320E290E320E440E170E22
in word set 005400FC0072006B00E70065
in word set 005400691EBF006E00670020005600691EC70074
in word set 65E5672C8A9E
in word set 4E2D6587
in word set D55CAE00

26
tests/lang-ucs2.gperf Normal file
View File

@@ -0,0 +1,26 @@
struct language {
const char *foreign_name;
const char *english_name;
const char *locale;
};
%%
"\x12\xA0\x12\x1B\x12\x2D\x12\x9B", "Amharic", NULL
"\x01\x0D\x00\x65\x00\x73\x00\x6B\x00\x79", "Czech", "cs_CZ.UTF-8"
"\x00\x44\x00\x61\x00\x6E\x00\x73\x00\x6B", "Danish", "da_DK.UTF-8"
"\x00\x45\x00\x6E\x00\x67\x00\x6C\x00\x69\x00\x73\x00\x68", "English", "en_GB.UTF-8"
"\x00\x53\x00\x75\x00\x6F\x00\x6D\x00\x69", "Finnish", "fi_FI.UTF-8"
"\x00\x46\x00\x72\x00\x61\x00\x6E\x00\xE7\x00\x61\x00\x69\x00\x73", "French", "fr_FR.UTF-8"
"\x00\x44\x00\x65\x00\x75\x00\x74\x00\x73\x00\x63\x00\x68", "German", "de_DE.UTF-8"
"\x03\x95\x03\xBB\x03\xBB\x03\xB7\x03\xBD\x03\xB9\x03\xBA\x03\xAC", "Greek", "el_GR.UTF-8"
"\x05\xE2\x05\xD1\x05\xE8\x05\xD9\x05\xEA", "Hebrew", "he_IL.UTF-8"
"\x00\x49\x00\x74\x00\x61\x00\x6C\x00\x69\x00\x61\x00\x6E\x00\x6F", "Italian", "it_IT.UTF-8"
"\x00\x4E\x00\x6F\x00\x72\x00\x73\x00\x6B", "Norwegian", "no_NO.UTF-8"
"\x04\x20\x04\x43\x04\x41\x04\x41\x04\x3A\x04\x38\x04\x39", "Russian", "ru_RU.UTF-8"
"\x00\x45\x00\x73\x00\x70\x00\x61\x00\xF1\x00\x6F\x00\x6C", "Spanish", "es_ES.UTF-8"
"\x00\x53\x00\x76\x00\x65\x00\x6E\x00\x73\x00\x6B\x00\x61", "Swedish", "sv_SE.UTF-8"
"\x0E\x20\x0E\x32\x0E\x29\x0E\x32\x0E\x44\x0E\x17\x0E\x22", "Thai", "th_TH.UTF-8"
"\x00\x54\x00\xFC\x00\x72\x00\x6B\x00\xE7\x00\x65", "Turkish", "tr_TR.UTF-8"
"\x00\x54\x00\x69\x1E\xBF\x00\x6E\x00\x67\x00\x20\x00\x56\x00\x69\x1E\xC7\x00\x74", "Vietnamese", "vi_VN.UTF-8"
"\x65\xE5\x67\x2C\x8A\x9E", "Japanese", "ja_JP.UTF-8"
"\x4E\x2D\x65\x87", "Chinese", "zh_CN.UTF-8"
"\xD5\x5C\xAE\x00", "Korean", "ko_KR.UTF-8"

BIN
tests/lang-ucs2.in Normal file

Binary file not shown.

20
tests/lang-utf8.exp Normal file
View File

@@ -0,0 +1,20 @@
in word set አማርኛ
in word set česky
in word set Dansk
in word set English
in word set Suomi
in word set Français
in word set Deutsch
in word set Ελληνικά
in word set עברית
in word set Italiano
in word set Norsk
in word set Русский
in word set Español
in word set Svenska
in word set ภาษาไทย
in word set Türkçe
in word set Tiếng Việt
in word set 日本語
in word set 中文
in word set 한글

26
tests/lang-utf8.gperf Normal file
View File

@@ -0,0 +1,26 @@
struct language {
const char *foreign_name;
const char *english_name;
const char *locale;
};
%%
አማርኛ, "Amharic", NULL
česky, "Czech", "cs_CZ.UTF-8"
Dansk, "Danish", "da_DK.UTF-8"
English, "English", "en_GB.UTF-8"
Suomi, "Finnish", "fi_FI.UTF-8"
Français, "French", "fr_FR.UTF-8"
Deutsch, "German", "de_DE.UTF-8"
Ελληνικά, "Greek", "el_GR.UTF-8"
עברית, "Hebrew", "he_IL.UTF-8"
Italiano, "Italian", "it_IT.UTF-8"
Norsk, "Norwegian", "no_NO.UTF-8"
Русский, "Russian", "ru_RU.UTF-8"
Español, "Spanish", "es_ES.UTF-8"
Svenska, "Swedish", "sv_SE.UTF-8"
ภาษาไทย, "Thai", "th_TH.UTF-8"
Türkçe, "Turkish", "tr_TR.UTF-8"
Tiếng Việt, "Vietnamese", "vi_VN.UTF-8"
日本語, "Japanese", "ja_JP.UTF-8"
中文, "Chinese", "zh_CN.UTF-8"
한글, "Korean", "ko_KR.UTF-8"

45
tests/test2.c Normal file
View File

@@ -0,0 +1,45 @@
/*
Tests the generated perfect hash function.
The -v option prints diagnostics as to whether a word is in
the set or not. Without -v the program is useful for timing.
*/
#include <stdio.h>
#define MAX_LEN 80
int
main (argc, argv)
int argc;
char *argv[];
{
int verbose = argc > 1 ? 1 : 0;
char buf[2*MAX_LEN];
int buflen;
for (;;)
{
/* Simulate gets(buf) with 2 bytes per character. */
char *p = buf;
while (fread (p, 2, 1, stdin) == 1)
{
if ((p[0] << 8) + p[1] == '\n')
break;
p += 2;
}
buflen = p - buf;
if (buflen == 0)
break;
if (in_word_set (buf, buflen) && verbose)
printf ("in word set ");
else if (verbose)
printf ("NOT in word set ");
for (p = buf; p < buf + buflen; p++)
printf ("%02X", (unsigned char) *p);
printf("\n");
}
return 0;
}