[BACK]Return to texindex.c CVS log [TXT][DIR] Up to [local] / OpenXM / src / asir-doc / jtexindex / C

Annotation of OpenXM/src/asir-doc/jtexindex/C/texindex.c, Revision 1.1.1.1

1.1       noro        1: /* Prepare TeX index dribble output into an actual index.
                      2:
                      3:    Version 1.45
                      4:
                      5:    Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc.
                      6:
                      7:    This program is free software; you can redistribute it and/or modify
                      8:    it under the terms of the GNU General Public License as published by
                      9:    the Free Software Foundation; either version 2, or (at your option)
                     10:    any later version.
                     11:
                     12:    This program is distributed in the hope that it will be useful,
                     13:    but WITHOUT ANY WARRANTY; without even the implied warranty of
                     14:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     15:    GNU General Public License for more details.
                     16:
                     17:    You should have received a copy of the GNU General Public License
                     18:    along with this program; if not, write to the Free Software
                     19:    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
                     20:
                     21: /*
                     22:    95.10.18 modified by Nobuyosi KAMEI <CXK03514@niftyserve.or.jp>
                     23:        日本語EUCが処理できるようにした。
                     24:    95.10.19 modified by K. Handa <handa@etl.go.jp>
                     25:        \initial{...} の出力を調整。
                     26: */
                     27: 
                     28:
                     29: #include <stdio.h>
                     30: #include <ctype.h>
                     31: #include <errno.h>
                     32: #include    <assert.h>
                     33: #include "getopt.h"
                     34:
                     35: #define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4"
                     36: #define TEXINDEX_MODIFICATION_STRING "(modified for Japanese EUC encoding)"
                     37:
                     38: #if defined (emacs)
                     39: #  include "../src/config.h"
                     40: /* Some s/os.h files redefine these. */
                     41: #  undef read
                     42: #  undef close
                     43: #  undef write
                     44: #  undef open
                     45: #endif
                     46:
                     47: #if defined (HAVE_STRING_H)
                     48: #  include <string.h>
                     49: #endif /* HAVE_STRING_H */
                     50:
                     51: #if !defined (HAVE_STRCHR)
                     52: char *strrchr ();
                     53: #endif /* !HAVE_STRCHR */
                     54:
                     55: #if defined (STDC_HEADERS)
                     56: #  include <stdlib.h>
                     57: #else /* !STDC_HEADERS */
                     58: char *getenv (), *malloc (), *realloc ();
                     59: #endif /* !STDC_HEADERS */
                     60:
                     61: #if defined (HAVE_UNISTD_H)
                     62: #  include <unistd.h>
                     63: #else /* !HAVE_UNISTD_H */
                     64: off_t lseek ();
                     65: #endif /* !HAVE_UNISTD_H */
                     66:
                     67: #if !defined (HAVE_MEMSET)
                     68: #undef memset
                     69: #define memset(ptr, ignore, count) bzero (ptr, count)
                     70: #endif
                     71:
                     72:
                     73: char *mktemp ();
                     74:
                     75: #if defined (VMS)
                     76: #  include <file.h>
                     77: #  define TI_NO_ERROR ((1 << 28) | 1)
                     78: #  define TI_FATAL_ERROR ((1 << 28) | 4)
                     79: #  define unlink delete
                     80: #else /* !VMS */
                     81: #  if defined (HAVE_SYS_FCNTL_H)
                     82: #    include <sys/types.h>
                     83: #    include <sys/fcntl.h>
                     84: #  endif /* HAVE_SYS_FCNTL_H */
                     85:
                     86: #  if defined (_AIX) || !defined (_POSIX_VERSION)
                     87: #    include <sys/file.h>
                     88: #  else /* !AIX && _POSIX_VERSION */
                     89: #    if !defined (HAVE_SYS_FCNTL_H)
                     90: #      include <fcntl.h>
                     91: #    endif /* !HAVE_FCNTL_H */
                     92: #  endif /* !_AIX && _POSIX_VERSION */
                     93: #  define TI_NO_ERROR 0
                     94: #  define TI_FATAL_ERROR 1
                     95: #endif /* !VMS */
                     96:
                     97: #if !defined (SEEK_SET)
                     98: #  define SEEK_SET 0
                     99: #  define SEEK_CUR 1
                    100: #  define SEEK_END 2
                    101: #endif /* !SEEK_SET */
                    102:
                    103: #if !defined (errno)
                    104: extern int errno;
                    105: #endif
                    106: char *strerror ();
                    107:
                    108: /* When sorting in core, this structure describes one line
                    109:    and the position and length of its first keyfield.  */
                    110: struct lineinfo
                    111: {
                    112:   u_short *text;               /* The actual text of the line. */
                    113:   union {
                    114:     u_short *text;     /* The start of the key (for textual comparison). */
                    115:     long number;       /* The numeric value (for numeric comparison). */
                    116:   } key;
                    117:   long keylen;         /* Length of KEY field. */
                    118: };
                    119:
                    120: /* This structure describes a field to use as a sort key. */
                    121: struct keyfield
                    122: {
                    123:   int startwords;      /* Number of words to skip. */
                    124:   int startchars;      /* Number of additional chars to skip. */
                    125:   int endwords;                /* Number of words to ignore at end. */
                    126:   int endchars;                /* Ditto for characters of last word. */
                    127:   char ignore_blanks;  /* Non-zero means ignore spaces and tabs. */
                    128:   char fold_case;      /* Non-zero means case doesn't matter. */
                    129:   char reverse;                /* Non-zero means compare in reverse order. */
                    130:   char numeric;                /* Non-zeros means field is ASCII numeric. */
                    131:   char positional;     /* Sort according to file position. */
                    132:   char braced;         /* Count balanced-braced groupings as fields. */
                    133: };
                    134:
                    135: /* Vector of keyfields to use. */
                    136: struct keyfield keyfields[3];
                    137:
                    138: /* Number of keyfields stored in that vector.  */
                    139: int num_keyfields = 3;
                    140:
                    141: /* Vector of input file names, terminated with a null pointer. */
                    142: char **infiles;
                    143:
                    144: /* Vector of corresponding output file names, or NULL, meaning default it
                    145:    (add an `s' to the end). */
                    146: char **outfiles;
                    147:
                    148: /* Length of `infiles'. */
                    149: int num_infiles;
                    150:
                    151: /* Pointer to the array of pointers to lines being sorted. */
                    152: u_short **linearray;
                    153:
                    154: /* The allocated length of `linearray'. */
                    155: long nlines;
                    156:
                    157: /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
                    158: char *tempdir;
                    159:
                    160: /* Start of filename to use for temporary files.  */
                    161: char *tempbase;
                    162:
                    163: /* Number of last temporary file.  */
                    164: int tempcount;
                    165:
                    166: /* Number of last temporary file already deleted.
                    167:    Temporary files are deleted by `flush_tempfiles' in order of creation.  */
                    168: int last_deleted_tempcount;
                    169:
                    170: /* During in-core sort, this points to the base of the data block
                    171:    which contains all the lines of data.  */
                    172: u_short *text_base;
                    173:
                    174: /* Additional command switches .*/
                    175:
                    176: /* Nonzero means do not delete tempfiles -- for debugging. */
                    177: int keep_tempfiles;
                    178:
                    179: /* The name this program was run with. */
                    180: char *program_name;
                    181:
                    182: /* 仮名文字の母音テーブル */
                    183: int Vowel_Table[0x60];
                    184:
                    185: /* Forward declarations of functions in this file. */
                    186:
                    187: void decode_command ();
                    188: void sort_in_core ();
                    189: void sort_offline ();
                    190: u_short **parsefile ();
                    191: u_short *find_field ();
                    192: u_short *find_pos ();
                    193: long find_value ();
                    194: u_short *find_braced_pos (u_short *str, int words, int chars, int ignore_blanks);
                    195: u_short *find_braced_end ();
                    196: void writelines (u_short **linearray, int nlines, FILE *ostream);
                    197: int compare_field ();
                    198: int compare_full ();
                    199: long readline ();
                    200: int merge_files ();
                    201: int merge_direct ();
                    202: void pfatal_with_name ();
                    203: void fatal ();
                    204: void error ();
                    205: void *xmalloc (), *xrealloc ();
                    206: char *concat ();
                    207: char *maketempname ();
                    208: void flush_tempfiles ();
                    209: char *tempcopy ();
                    210: static void    memory_error();
                    211: static int wstrncmp(const u_short *s1, const u_short *s2, size_t n);
                    212: static int wfwrite(u_short *ptr, size_t  size,  size_t  nmemb,  FILE*stream);
                    213: static int wtoc(char *cptr, u_short const *ptr, int len);
                    214: static int  Vowel_of(int c);
                    215: static void init_Vowel_Table(void);
                    216:
                    217: #define        EUC_BYTE(c) (0x00A1 <= c && c <= 0x00FE)
                    218: #define G1_CHAR(c) (c & 0xFF00)
                    219: 
                    220: #define MAX_IN_CORE_SORT 500000
                    221:
                    222: void
                    223: main (argc, argv)
                    224:      int argc;
                    225:      char **argv;
                    226: {
                    227:   int i;
                    228:
                    229:   tempcount = 0;
                    230:   last_deleted_tempcount = 0;
                    231:
                    232:   program_name = strrchr (argv[0], '/');
                    233:   if (program_name != (char *)NULL)
                    234:     program_name++;
                    235:   else
                    236:     program_name = argv[0];
                    237:
                    238:   /* Describe the kind of sorting to do. */
                    239:   /* The first keyfield uses the first braced field and folds case. */
                    240:   keyfields[0].braced = 1;
                    241:   keyfields[0].fold_case = 1;
                    242:   keyfields[0].endwords = -1;
                    243:   keyfields[0].endchars = -1;
                    244:
                    245:   /* The second keyfield uses the second braced field, numerically. */
                    246:   keyfields[1].braced = 1;
                    247:   keyfields[1].numeric = 1;
                    248:   keyfields[1].startwords = 1;
                    249:   keyfields[1].endwords = -1;
                    250:   keyfields[1].endchars = -1;
                    251:
                    252:   /* The third keyfield (which is ignored while discarding duplicates)
                    253:      compares the whole line. */
                    254:   keyfields[2].endwords = -1;
                    255:   keyfields[2].endchars = -1;
                    256:
                    257:   decode_command (argc, argv);
                    258:
                    259:   tempbase = mktemp (concat ("txiXXXXXX", "", ""));
                    260:
                    261:   /* Process input files completely, one by one.  */
                    262:
                    263:   for (i = 0; i < num_infiles; i++)
                    264:     {
                    265:       int desc;
                    266:       long ptr;
                    267:       char *outfile;
                    268:
                    269:       desc = open (infiles[i], O_RDONLY, 0);
                    270:       if (desc < 0)
                    271:        pfatal_with_name (infiles[i]);
                    272:       lseek (desc, (off_t) 0, SEEK_END);
                    273:       ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR);
                    274:
                    275:       close (desc);
                    276:
                    277:       outfile = outfiles[i];
                    278:       if (!outfile)
                    279:        {
                    280:          outfile = concat (infiles[i], "s", "");
                    281:        }
                    282:
                    283:       if (ptr < MAX_IN_CORE_SORT)
                    284:        /* Sort a small amount of data. */
                    285:        sort_in_core (infiles[i], ptr, outfile);
                    286:       else
                    287:        sort_offline (infiles[i], ptr, outfile);
                    288:     }
                    289:
                    290:   flush_tempfiles (tempcount);
                    291:   exit (TI_NO_ERROR);
                    292: }
                    293: 
                    294: typedef struct
                    295: {
                    296:   char *long_name;
                    297:   char *short_name;
                    298:   int *variable_ref;
                    299:   int variable_value;
                    300:   char *arg_name;
                    301:   char *doc_string;
                    302: } TEXINDEX_OPTION;
                    303:
                    304: TEXINDEX_OPTION texindex_options[] = {
                    305:   { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
                    306:       "Keep temporary files around after processing" },
                    307:   { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
                    308:       "Do not keep temporary files around after processing (default)" },
                    309:   { "--output", "-o", (int *)NULL, 0, "FILE",
                    310:       "Send output to FILE" },
                    311:   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
                    312:       "Show version information" },
                    313:   { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" },
                    314:   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
                    315: };
                    316:
                    317: void
                    318: usage (result_value)
                    319:      int result_value;
                    320: {
                    321:   register int i;
                    322:
                    323:   fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name);
                    324:   fprintf (stderr, "  Generate a permuted index for the TeX files given.\n");
                    325:   fprintf (stderr, "  Usually FILE... is `foo.??' for the source file `foo.tex'.\n");
                    326:   fprintf (stderr, "  The OPTIONS are:\n");
                    327:
                    328:   for (i = 0; texindex_options[i].long_name; i++)
                    329:     {
                    330:       fprintf (stderr, "    %s %s",
                    331:               texindex_options[i].long_name,
                    332:               texindex_options[i].arg_name ?
                    333:               texindex_options[i].arg_name : "");
                    334:
                    335:       if (texindex_options[i].short_name)
                    336:        fprintf (stderr, " \n    or %s %s",
                    337:                 texindex_options[i].short_name,
                    338:                 texindex_options[i].arg_name ?
                    339:                 texindex_options[i].arg_name : "");
                    340:       fprintf (stderr, "\t%s\n", texindex_options[i].doc_string);
                    341:     }
                    342:   exit (result_value);
                    343: }
                    344:
                    345: /* Decode the command line arguments to set the parameter variables
                    346:    and set up the vector of keyfields and the vector of input files. */
                    347:
                    348: void
                    349: decode_command (argc, argv)
                    350:      int argc;
                    351:      char **argv;
                    352: {
                    353:   int arg_index = 1;
                    354:   char **ip;
                    355:   char **op;
                    356:
                    357:   /* Store default values into parameter variables. */
                    358:
                    359:   tempdir = getenv ("TMPDIR");
                    360: #ifdef VMS
                    361:   if (tempdir == NULL)
                    362:     tempdir = "sys$scratch:";
                    363: #else
                    364:   if (tempdir == NULL)
                    365:     tempdir = "/tmp/";
                    366:   else
                    367:     tempdir = concat (tempdir, "/", "");
                    368: #endif
                    369:
                    370:   keep_tempfiles = 0;
                    371:
                    372:   /* Allocate ARGC input files, which must be enough.  */
                    373:
                    374:   infiles = (char **) xmalloc (argc * sizeof (char *));
                    375:   outfiles = (char **) xmalloc (argc * sizeof (char *));
                    376:   ip = infiles;
                    377:   op = outfiles;
                    378:
                    379:   while (arg_index < argc)
                    380:     {
                    381:       char *arg = argv[arg_index++];
                    382:
                    383:       if (*arg == '-')
                    384:        {
                    385:          if (strcmp (arg, "--version") == 0)
                    386:            {
                    387:              fprintf (stderr, "%s", TEXINDEX_VERSION_STRING);
                    388: #ifdef TEXINDEX_MODIFICATION_STRING
                    389:              fprintf (stderr, " %s", TEXINDEX_MODIFICATION_STRING);
                    390: #endif
                    391:              fprintf (stderr, "\n");
                    392:              exit (0);
                    393:            }
                    394:          else if ((strcmp (arg, "--keep") == 0) ||
                    395:                   (strcmp (arg, "-k") == 0))
                    396:            {
                    397:              keep_tempfiles = 1;
                    398:            }
                    399:          else if ((strcmp (arg, "--help") == 0) ||
                    400:                   (strcmp (arg, "-h") == 0))
                    401:            {
                    402:              usage (0);
                    403:            }
                    404:          else if ((strcmp (arg, "--output") == 0) ||
                    405:                   (strcmp (arg, "-o") == 0))
                    406:            {
                    407:              if (argv[arg_index] != (char *)NULL)
                    408:                {
                    409:                  arg_index++;
                    410:                  if (op > outfiles)
                    411:                    *(op - 1) = argv[arg_index];
                    412:                }
                    413:              else
                    414:                usage (1);
                    415:            }
                    416:          else
                    417:            usage (1);
                    418:        }
                    419:       else
                    420:        {
                    421:          *ip++ = arg;
                    422:          *op++ = (char *)NULL;
                    423:        }
                    424:     }
                    425:
                    426:   /* Record number of keyfields and terminate list of filenames. */
                    427:   num_infiles = ip - infiles;
                    428:   *ip = (char *)NULL;
                    429:   if (num_infiles == 0)
                    430:     usage (1);
                    431: }
                    432: 
                    433: /* Return a name for a temporary file. */
                    434:
                    435: char *
                    436: maketempname (count)
                    437:      int count;
                    438: {
                    439:   char tempsuffix[10];
                    440:   sprintf (tempsuffix, "%d", count);
                    441:   return concat (tempdir, tempbase, tempsuffix);
                    442: }
                    443:
                    444: /* Delete all temporary files up to TO_COUNT. */
                    445:
                    446: void
                    447: flush_tempfiles (to_count)
                    448:      int to_count;
                    449: {
                    450:   if (keep_tempfiles)
                    451:     return;
                    452:   while (last_deleted_tempcount < to_count)
                    453:     unlink (maketempname (++last_deleted_tempcount));
                    454: }
                    455:
                    456: /* Copy the input file open on IDESC into a temporary file
                    457:    and return the temporary file name. */
                    458:
                    459: #define BUFSIZE 1024
                    460:
                    461: char *
                    462: tempcopy (idesc)
                    463:      int idesc;
                    464: {
                    465:   char *outfile = maketempname (++tempcount);
                    466:   int odesc;
                    467:   char buffer[BUFSIZE];
                    468:
                    469:   odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
                    470:
                    471:   if (odesc < 0)
                    472:     pfatal_with_name (outfile);
                    473:
                    474:   while (1)
                    475:     {
                    476:       int nread = read (idesc, buffer, BUFSIZE);
                    477:       write (odesc, buffer, nread);
                    478:       if (!nread)
                    479:        break;
                    480:     }
                    481:
                    482:   close (odesc);
                    483:
                    484:   return outfile;
                    485: }
                    486: 
                    487: /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
                    488:
                    489: int
                    490: compare_full (line1, line2)
                    491:      u_short **line1, **line2;
                    492: {
                    493:   int i;
                    494:
                    495:   /* Compare using the first keyfield;
                    496:      if that does not distinguish the lines, try the second keyfield;
                    497:      and so on. */
                    498:
                    499:   for (i = 0; i < num_keyfields; i++)
                    500:     {
                    501:       long length1, length2;
                    502:       u_short *start1 = find_field (&keyfields[i], *line1, &length1);
                    503:       u_short *start2 = find_field (&keyfields[i], *line2, &length2);
                    504:       int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
                    505:                               start2, length2, *line2 - text_base);
                    506:       if (tem)
                    507:        {
                    508:          if (keyfields[i].reverse)
                    509:            return -tem;
                    510:          return tem;
                    511:        }
                    512:     }
                    513:
                    514:   return 0;                    /* Lines match exactly. */
                    515: }
                    516:
                    517: /* Compare LINE1 and LINE2, described by structures
                    518:    in which the first keyfield is identified in advance.
                    519:    For positional sorting, assumes that the order of the lines in core
                    520:    reflects their nominal order.  */
                    521:
                    522: int
                    523: compare_prepared (line1, line2)
                    524:      struct lineinfo *line1, *line2;
                    525: {
                    526:   int i;
                    527:   int tem;
                    528:   u_short *text1, *text2;
                    529:
                    530:   /* Compare using the first keyfield, which has been found for us already. */
                    531:   if (keyfields->positional)
                    532:     {
                    533:       if (line1->text - text_base > line2->text - text_base)
                    534:        tem = 1;
                    535:       else
                    536:        tem = -1;
                    537:     }
                    538:   else if (keyfields->numeric)
                    539:     tem = line1->key.number - line2->key.number;
                    540:   else
                    541:     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
                    542:                         line2->key.text, line2->keylen, 0);
                    543:   if (tem)
                    544:     {
                    545:       if (keyfields->reverse)
                    546:        return -tem;
                    547:       return tem;
                    548:     }
                    549:
                    550:   text1 = line1->text;
                    551:   text2 = line2->text;
                    552:
                    553:   /* Compare using the second keyfield;
                    554:      if that does not distinguish the lines, try the third keyfield;
                    555:      and so on. */
                    556:
                    557:   for (i = 1; i < num_keyfields; i++)
                    558:     {
                    559:       long length1, length2;
                    560:       u_short *start1 = find_field (&keyfields[i], text1, &length1);
                    561:       u_short *start2 = find_field (&keyfields[i], text2, &length2);
                    562:       int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
                    563:                               start2, length2, text2 - text_base);
                    564:       if (tem)
                    565:        {
                    566:          if (keyfields[i].reverse)
                    567:            return -tem;
                    568:          return tem;
                    569:        }
                    570:     }
                    571:
                    572:   return 0;                    /* Lines match exactly. */
                    573: }
                    574:
                    575: /* Like compare_full but more general.
                    576:    You can pass any strings, and you can say how many keyfields to use.
                    577:    POS1 and POS2 should indicate the nominal positional ordering of
                    578:    the two lines in the input.  */
                    579:
                    580: int
                    581: compare_general (str1, str2, pos1, pos2, use_keyfields)
                    582:      u_short *str1, *str2;
                    583:      long pos1, pos2;
                    584:      int use_keyfields;
                    585: {
                    586:   int i;
                    587:
                    588:   /* Compare using the first keyfield;
                    589:      if that does not distinguish the lines, try the second keyfield;
                    590:      and so on. */
                    591:
                    592:   for (i = 0; i < use_keyfields; i++)
                    593:     {
                    594:       long length1, length2;
                    595:       u_short *start1 = find_field (&keyfields[i], str1, &length1);
                    596:       u_short *start2 = find_field (&keyfields[i], str2, &length2);
                    597:       int tem = compare_field (&keyfields[i], start1, length1, pos1,
                    598:                               start2, length2, pos2);
                    599:       if (tem)
                    600:        {
                    601:          if (keyfields[i].reverse)
                    602:            return -tem;
                    603:          return tem;
                    604:        }
                    605:     }
                    606:
                    607:   return 0;                    /* Lines match exactly. */
                    608: }
                    609:
                    610: /* Find the start and length of a field in STR according to KEYFIELD.
                    611:    A pointer to the starting character is returned, and the length
                    612:    is stored into the int that LENGTHPTR points to.  */
                    613:
                    614: u_short *
                    615: find_field (keyfield, str, lengthptr)
                    616:      struct keyfield *keyfield;
                    617:      u_short *str;
                    618:      long *lengthptr;
                    619: {
                    620:   u_short *start;
                    621:   u_short *end;
                    622:   u_short *(*fun) ();
                    623:
                    624:   if (keyfield->braced)
                    625:     fun = find_braced_pos;
                    626:   else
                    627:     fun = find_pos;
                    628:
                    629:   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
                    630:                  keyfield->ignore_blanks);
                    631:   if (keyfield->endwords < 0)
                    632:     {
                    633:       if (keyfield->braced)
                    634:        end = find_braced_end (start);
                    635:       else
                    636:        {
                    637:          end = start;
                    638:          while (*end && *end != '\n')
                    639:            end++;
                    640:        }
                    641:     }
                    642:   else
                    643:     {
                    644:       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
                    645:       if (end - str < start - str)
                    646:        end = start;
                    647:     }
                    648:   *lengthptr = end - start;
                    649:   return start;
                    650: }
                    651:
                    652: /* Return a pointer to a specified place within STR,
                    653:    skipping (from the beginning) WORDS words and then CHARS chars.
                    654:    If IGNORE_BLANKS is nonzero, we skip all blanks
                    655:    after finding the specified word.  */
                    656:
                    657: u_short *
                    658: find_pos (str, words, chars, ignore_blanks)
                    659:      u_short *str;
                    660:      int words, chars;
                    661:      int ignore_blanks;
                    662: {
                    663:   int i;
                    664:   u_short *p = str;
                    665:
                    666:   for (i = 0; i < words; i++)
                    667:     {
                    668:       u_short c;
                    669:       /* Find next bunch of nonblanks and skip them. */
                    670:       while ((c = *p) == ' ' || c == '\t')
                    671:        p++;
                    672:       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
                    673:        p++;
                    674:       if (!*p || *p == '\n')
                    675:        return p;
                    676:     }
                    677:
                    678:   while (*p == ' ' || *p == '\t')
                    679:     p++;
                    680:
                    681:   for (i = 0; i < chars; i++)
                    682:     {
                    683:       if (!*p || *p == '\n')
                    684:        break;
                    685:       p++;
                    686:     }
                    687:   return p;
                    688: }
                    689:
                    690: /* Like find_pos but assumes that each field is surrounded by braces
                    691:    and that braces within fields are balanced. */
                    692:
                    693: u_short *
                    694: find_braced_pos (str, words, chars, ignore_blanks)
                    695:      u_short *str;
                    696:      int words, chars;
                    697:      int ignore_blanks;
                    698: {
                    699:   int i;
                    700:   int bracelevel;
                    701:   u_short *p = str;
                    702:   u_short c;
                    703:
                    704:   for (i = 0; i < words; i++)
                    705:     {
                    706:       bracelevel = 1;
                    707:       while ((c = *p++) != '{' && c != '\n' && c)
                    708:        /* Do nothing. */ ;
                    709:       if (c != '{')
                    710:        return p - 1;
                    711:       while (bracelevel)
                    712:        {
                    713:          c = *p++;
                    714:          if (c == '{')
                    715:            bracelevel++;
                    716:          if (c == '}')
                    717:            bracelevel--;
                    718:          if (c == 0 || c == '\n')
                    719:            return p - 1;
                    720:        }
                    721:     }
                    722:
                    723:   while ((c = *p++) != '{' && c != '\n' && c)
                    724:     /* Do nothing. */ ;
                    725:
                    726:   if (c != '{')
                    727:     return p - 1;
                    728:
                    729:   if (ignore_blanks)
                    730:     while ((c = *p) == ' ' || c == '\t')
                    731:       p++;
                    732:
                    733:   for (i = 0; i < chars; i++)
                    734:     {
                    735:       if (!*p || *p == '\n')
                    736:        break;
                    737:       p++;
                    738:     }
                    739:   return p;
                    740: }
                    741:
                    742: /* Find the end of the balanced-brace field which starts at STR.
                    743:    The position returned is just before the closing brace. */
                    744:
                    745: u_short *
                    746: find_braced_end (str)
                    747:      u_short *str;
                    748: {
                    749:   int bracelevel;
                    750:   u_short *p = str;
                    751:   u_short c;
                    752:
                    753:   bracelevel = 1;
                    754:   while (bracelevel)
                    755:     {
                    756:       c = *p++;
                    757:       if (c == '{')
                    758:        bracelevel++;
                    759:       if (c == '}')
                    760:        bracelevel--;
                    761:       if (c == 0 || c == '\n')
                    762:        return p - 1;
                    763:     }
                    764:   return p - 1;
                    765: }
                    766:
                    767: long
                    768: find_value (start, length)
                    769:     u_short *start;
                    770:     long length;
                    771:     {
                    772:     long       val;
                    773:
                    774:     while (length != 0L)
                    775:        {
                    776:        if (isdigit (*start))
                    777:            break;
                    778:        length--;
                    779:        start++;
                    780:        }
                    781:
                    782:     val = 0;
                    783:     while(isdigit(*start) && length > 0L)
                    784:        {
                    785:        val = val*10 + *start;
                    786:        start++;
                    787:        length--;
                    788:        }
                    789:
                    790:     return val;
                    791:     }
                    792:
                    793: /* Vector used to translate characters for comparison.
                    794:    This is how we make all alphanumerics follow all else,
                    795:    and ignore case in the first sorting.  */
                    796: int char_order[0x8000];
                    797:
                    798: void
                    799: init_char_order (void)
                    800:     {
                    801:     int i;
                    802:
                    803:     for (i = 1; i < 0x8000; i++)
                    804:        char_order[i] = i;
                    805:
                    806:     for (i = '0'; i <= '9'; i++)
                    807:        char_order[i] += 512;
                    808:
                    809:     for (i = 'a'; i <= 'z'; i++)
                    810:        {
                    811:        char_order[i] = 512 + i;
                    812:        char_order[i + 'A' - 'a'] = 512 + i;
                    813:        }
                    814:
                    815:     /* 以下のシンボルは無視 (i.e. char_order[XXX] = 0) */
                    816:     for (i = 0x2121; i <= 0x2132; i++) /* 全角空白 〜 _ */
                    817:         char_order[i] = 0;
                    818:     for (i = 0x213D; i <= 0x215B; i++) /* − 〜 】 */
                    819:         char_order[i] = 0;
                    820:
                    821:     /* 促音、拗音、濁音、半濁音、片仮名/平仮名の処理   */
                    822:     /*     1.  平仮名と片仮名、同じに扱う  */
                    823:     /*         ぁ/ァ(2421/2521)〜ん/ン(2473/2573)  */
                    824:     for (i = 0x2421; i < 0x2474; i++)
                    825:        char_order[i + 0x100] = i;
                    826:
                    827:     /*     2.  促音、拗音、濁音、半濁音、類    */
                    828:     /* ぁ〜ぉ →       あ〜お  */
                    829:     for (i = 0x2421; i < 0x242B; i += 2)
                    830:        char_order[i] = char_order[i + 0x100]    = i+1;
                    831:     /* が〜ぢ →       か〜ち  */
                    832:     for (i = 0x242C; i < 0x2443; i += 2)
                    833:        char_order[i] = char_order[i + 0x100]    = i-1;
                    834:     /* っ */
                    835:     char_order[0x2443] = char_order[0x2543]    = 0x2443+1;
                    836:     /* づ〜ど →       つ〜と */
                    837:     for (i = 0x2445; i < 0x244A; i += 2)
                    838:        char_order[i] = char_order[i + 0x100]    = i-1;
                    839:     /* ばぱ〜ぼぽ      →  は〜ほ  */
                    840:     for (i = 0x2450; i < 0x245E; i += 3)
                    841:        {
                    842:        char_order[i]   = char_order[i + 0x100]     = i-1;
                    843:        char_order[i+1] = char_order[i+1 + 0x100]   = i-1;
                    844:        }
                    845:     /* ゃ〜ょ →       や〜よ  */
                    846:     for (i = 0x2463; i < 0x2469; i += 2)
                    847:        char_order[i] = char_order[i + 0x100] = i+1;
                    848:     /* ゎ      →  わ  */
                    849:     char_order[0x246E] = char_order[0x256E]    = 0x246E +1;
                    850:
                    851:     /*     3.  特殊片仮名の扱い    */
                    852:     char_order[0x2574] = 0x2426;       /* ヴ   →  う */
                    853:     char_order[0x2575] = 0x242B;       /* ヵ   →  か */
                    854:     char_order[0x2576] = 0x2431;       /* ヶ   →  け */
                    855:
                    856:     /* JISX0208のアルファベット、数字  */
                    857:     for (i = 0x2330; i <= 0x2339; i++) /* 0〜9 -> 0-9 */
                    858:         char_order[i] = i - 0x2330 + 512;
                    859:     for (i = 0x2341; i <= 0x235A; i++) /* A〜Z、a〜z -> A-Z */
                    860:         char_order[i] = char_order[i + (0x2361 - 0x2341)]
                    861:          = 512 + (i -  0x2341) + 'a';
                    862:
                    863:     /* 長音処理は比較ルーチンで行なう */
                    864:
                    865:     init_Vowel_Table();
                    866:     }
                    867:
                    868: /* Compare two fields (each specified as a start pointer and a character count)
                    869:    according to KEYFIELD.
                    870:    The sign of the value reports the relation between the fields. */
                    871:
                    872: int
                    873: compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
                    874:      struct keyfield *keyfield;
                    875:      u_short *start1;
                    876:      long length1;
                    877:      long pos1;
                    878:      u_short *start2;
                    879:      long length2;
                    880:      long pos2;
                    881: {
                    882:   if (keyfields->positional)
                    883:     {
                    884:       if (pos1 > pos2)
                    885:        return 1;
                    886:       else
                    887:        return -1;
                    888:     }
                    889:   if (keyfield->numeric)
                    890:     {
                    891:       long value = find_value (start1, length1) - find_value (start2, length2);
                    892:       if (value > 0)
                    893:        return 1;
                    894:       if (value < 0)
                    895:        return -1;
                    896:       return 0;
                    897:     }
                    898:   else
                    899:     {
                    900:     /* lexical ってか? */
                    901:       u_short *p1 = start1;
                    902:       u_short *p2 = start2;
                    903:       u_short *e1 = start1 + length1;
                    904:       u_short *e2 = start2 + length2;
                    905:       u_short  lastchar1 = 0, lastchar2 = 0;
                    906:       int   c1, c2;
                    907:
                    908:       do
                    909:          {
                    910:          int   co1, co2;
                    911:
                    912:          /* char_order[X] が 0 のもの(シンボル)は無視する */
                    913:          c1 = c2 = 0;
                    914:          while (p1 < e1)
                    915:            {
                    916:              if (char_order[c1 = *p1++]) break;
                    917:              c1 = 0;
                    918:            }
                    919:          while (p2 < e2)
                    920:            {
                    921:              if (char_order[c2 = *p2++]) break;
                    922:              c2 = 0;
                    923:            }
                    924:
                    925:          /* 長音処理 */
                    926:          if (c1 == 0x213C)                     /* ー */
                    927:              c1    = Vowel_of(lastchar1);
                    928:          if (c2 == 0x213C)                     /* ー */
                    929:              c2    = Vowel_of(lastchar2);
                    930:          co1   = char_order[c1];
                    931:          co2   = char_order[c2];
                    932:
                    933:          if (co1 != co2)
                    934:              return co1 - co2;
                    935:          else
                    936:              lastchar1 = c1,   lastchar2 = c2;
                    937:
                    938:          } while(c1);
                    939:
                    940:       /* Strings are equal except possibly for case.  */
                    941:       p1 = start1;
                    942:       p2 = start2;
                    943:       while (1)
                    944:        {
                    945:          int c1, c2;
                    946:
                    947:          if (p1 == e1)
                    948:            c1 = 0;
                    949:          else
                    950:            c1 = *p1++;
                    951:          if (p2 == e2)
                    952:            c2 = 0;
                    953:          else
                    954:            c2 = *p2++;
                    955:
                    956:          if (c1 != c2)
                    957:            /* Reverse sign here so upper case comes out last.  */
                    958:            return c2 - c1;
                    959:          if (!c1)
                    960:            break;
                    961:        }
                    962:
                    963:       return 0;
                    964:     }
                    965: }
                    966: 
                    967: /* A `struct linebuffer' is a structure which holds a line of text.
                    968:    `readline' reads a line from a stream into a linebuffer
                    969:    and works regardless of the length of the line.  */
                    970:
                    971: struct linebuffer
                    972: {
                    973:   long size;
                    974:   u_short *buffer;
                    975: };
                    976:
                    977: /* Initialize LINEBUFFER for use. */
                    978:
                    979: void
                    980: initbuffer (linebuffer)
                    981:      struct linebuffer *linebuffer;
                    982: {
                    983:   linebuffer->size = 200;
                    984:   linebuffer->buffer = (u_short *) xmalloc (200*sizeof(u_short));
                    985: }
                    986:
                    987: /* Read a line of text from STREAM into LINEBUFFER.
                    988:    Return the length of the line.  */
                    989:
                    990: long
                    991: readline (linebuffer, stream)
                    992:      struct linebuffer *linebuffer;
                    993:      FILE *stream;
                    994: {
                    995:   u_short *buffer = linebuffer->buffer;
                    996:   u_short *p = linebuffer->buffer;
                    997:   u_short *end = p + linebuffer->size;
                    998:
                    999:   while (1)
                   1000:     {
                   1001:       int c = getc (stream);
                   1002:       if (p == end)
                   1003:        {
                   1004:          buffer =
                   1005:              (u_short *) xrealloc (buffer,
                   1006:                                    (linebuffer->size *= 2)*sizeof(u_short));
                   1007:          p += buffer - linebuffer->buffer;
                   1008:          end += buffer - linebuffer->buffer;
                   1009:          linebuffer->buffer = buffer;
                   1010:        }
                   1011:       if (c < 0 || c == '\n')
                   1012:        {
                   1013:          *p = 0;
                   1014:          break;
                   1015:        }
                   1016:       *p++ = c;
                   1017:     }
                   1018:
                   1019:   return p - buffer;
                   1020: }
                   1021: 
                   1022: /* Sort an input file too big to sort in core.  */
                   1023:
                   1024: void
                   1025: sort_offline (infile, nfiles, total, outfile)
                   1026:      char *infile;
                   1027:      int nfiles;
                   1028:      long total;
                   1029:      char *outfile;
                   1030: {
                   1031:   /* More than enough. */
                   1032:   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
                   1033:   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
                   1034:   FILE *istream = fopen (infile, "r");
                   1035:   int i;
                   1036:   struct linebuffer lb;
                   1037:   long linelength;
                   1038:   int failure = 0;
                   1039:
                   1040:   initbuffer (&lb);
                   1041:
                   1042:   /* Read in one line of input data.  */
                   1043:
                   1044:   linelength = readline (&lb, istream);
                   1045:
                   1046:   if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
                   1047:     {
                   1048:       error ("%s: not a texinfo index file", infile);
                   1049:       return;
                   1050:     }
                   1051:
                   1052:   /* Split up the input into `ntemps' temporary files, or maybe fewer,
                   1053:      and put the new files' names into `tempfiles' */
                   1054:
                   1055:   for (i = 0; i < ntemps; i++)
                   1056:     {
                   1057:       char *outname = maketempname (++tempcount);
                   1058:       FILE *ostream = fopen (outname, "w");
                   1059:       long tempsize = 0;
                   1060:
                   1061:       if (!ostream)
                   1062:        pfatal_with_name (outname);
                   1063:       tempfiles[i] = outname;
                   1064:
                   1065:       /* Copy lines into this temp file as long as it does not make file
                   1066:         "too big" or until there are no more lines.  */
                   1067:
                   1068:       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
                   1069:        {
                   1070:          tempsize += linelength + 1;
                   1071:              {
                   1072:              u_short   *tp;
                   1073:
                   1074:              for (tp = lb.buffer; *tp; tp++)
                   1075:                  putc(*tp, ostream);
                   1076:              }
                   1077:          putc ('\n', ostream);
                   1078:
                   1079:          /* Read another line of input data.  */
                   1080:
                   1081:          linelength = readline (&lb, istream);
                   1082:          if (!linelength && feof (istream))
                   1083:            break;
                   1084:
                   1085:          if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
                   1086:            {
                   1087:              error ("%s: not a texinfo index file", infile);
                   1088:              failure = 1;
                   1089:              goto fail;
                   1090:            }
                   1091:        }
                   1092:       fclose (ostream);
                   1093:       if (feof (istream))
                   1094:        break;
                   1095:     }
                   1096:
                   1097:   free (lb.buffer);
                   1098:
                   1099: fail:
                   1100:   /* Record number of temp files we actually needed.  */
                   1101:
                   1102:   ntemps = i;
                   1103:
                   1104:   /* Sort each tempfile into another tempfile.
                   1105:     Delete the first set of tempfiles and put the names of the second
                   1106:     into `tempfiles'. */
                   1107:
                   1108:   for (i = 0; i < ntemps; i++)
                   1109:     {
                   1110:       char *newtemp = maketempname (++tempcount);
                   1111:       sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
                   1112:       if (!keep_tempfiles)
                   1113:        unlink (tempfiles[i]);
                   1114:       tempfiles[i] = newtemp;
                   1115:     }
                   1116:
                   1117:   if (failure)
                   1118:     return;
                   1119:
                   1120:   /* Merge the tempfiles together and indexify. */
                   1121:
                   1122:   merge_files (tempfiles, ntemps, outfile);
                   1123: }
                   1124: 
                   1125: /* Sort INFILE, whose size is TOTAL,
                   1126:    assuming that is small enough to be done in-core,
                   1127:    then indexify it and send the output to OUTFILE (or to stdout).  */
                   1128:
                   1129: void
                   1130: sort_in_core (infile, total, outfile)
                   1131:      char *infile;
                   1132:      long total;
                   1133:      char *outfile;
                   1134: {
                   1135:   u_short **nextline;
                   1136:   u_short *data = (u_short *) xmalloc ((total + 1)*sizeof(u_short));
                   1137:   u_short *file_data;
                   1138:   long file_size;
                   1139:   int i;
                   1140:   FILE *ostream = stdout;
                   1141:   struct lineinfo *lineinfo;
                   1142:   FILE *ifp;
                   1143:   int  c;
                   1144:
                   1145:   /* Read the contents of the file into the moby array `data'. */
                   1146:   ifp  = fopen(infile, "r");
                   1147:   if (!ifp)
                   1148:     fatal ("failure reopening %s", infile);
                   1149:   memset(data, 0, (total + 1)*sizeof(u_short));        /* zero clear */
                   1150:   for (i = 0, c = fgetc(ifp); c != EOF; c = fgetc(ifp), i++)
                   1151:       {
                   1152:       if (EUC_BYTE(c))
                   1153:          {
                   1154:          data[i] = (c & 0x007F) << 8;
                   1155:          c = fgetc(ifp) & 0x007F;
                   1156:          }
                   1157:       data[i]  += c;
                   1158:       }
                   1159:   file_size = i;
                   1160:   file_data = data;
                   1161:   data[file_size] = 0;
                   1162:
                   1163:   fclose(ifp);
                   1164:
                   1165:   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
                   1166:     {
                   1167:       error ("%s: not a texinfo index file", infile);
                   1168:       return;
                   1169:     }
                   1170:
                   1171:   init_char_order ();
                   1172:
                   1173:   /* Sort routines want to know this address. */
                   1174:
                   1175:   text_base = data;
                   1176:
                   1177:   /* Create the array of pointers to lines, with a default size
                   1178:      frequently enough.  */
                   1179:
                   1180:   nlines = total / 50;
                   1181:   if (!nlines)
                   1182:     nlines = 2;
                   1183:   linearray = (u_short **) xmalloc (nlines * sizeof (u_short *));
                   1184:
                   1185:   /* `nextline' points to the next free slot in this array.
                   1186:      `nlines' is the allocated size.  */
                   1187:
                   1188:   nextline = linearray;
                   1189:
                   1190:   /* Parse the input file's data, and make entries for the lines.  */
                   1191:
                   1192:   nextline = parsefile (nextline, file_data, file_size);
                   1193:
                   1194:   if (nextline == 0)
                   1195:     {
                   1196:       error ("%s: not a texinfo index file", infile);
                   1197:       return;
                   1198:     }
                   1199:
                   1200:   /* Sort the lines. */
                   1201:
                   1202:   /* If we have enough space, find the first keyfield of each line in advance.
                   1203:      Make a `struct lineinfo' for each line, which records the keyfield
                   1204:      as well as the line, and sort them.  */
                   1205:
                   1206:   lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
                   1207:
                   1208:   if (lineinfo)
                   1209:     {
                   1210:       struct lineinfo *lp;
                   1211:       u_short **p;
                   1212:
                   1213:       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
                   1214:        {
                   1215:          lp->text = *p;
                   1216:          lp->key.text = find_field (keyfields, *p, &lp->keylen);
                   1217:          if (keyfields->numeric)
                   1218:            lp->key.number = find_value (lp->key.text, lp->keylen);
                   1219:        }
                   1220:
                   1221:       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
                   1222:
                   1223:       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
                   1224:          *p = lp->text;
                   1225:
                   1226:       free (lineinfo);
                   1227:     }
                   1228:   else
                   1229:     qsort (linearray, nextline - linearray, sizeof (u_short *), compare_full);
                   1230:
                   1231:   /* Open the output file. */
                   1232:
                   1233:   if (outfile)
                   1234:     {
                   1235:       ostream = fopen (outfile, "w");
                   1236:       if (!ostream)
                   1237:        pfatal_with_name (outfile);
                   1238:     }
                   1239:
                   1240:   writelines (linearray, nextline - linearray, ostream);
                   1241:   if (outfile)
                   1242:     fclose (ostream);
                   1243:
                   1244:   free (linearray);
                   1245:   free (data);
                   1246: }
                   1247: 
                   1248: /* Parse an input string in core into lines.
                   1249:    DATA is the input string, and SIZE is its length.
                   1250:    Data goes in LINEARRAY starting at NEXTLINE.
                   1251:    The value returned is the first entry in LINEARRAY still unused.
                   1252:    Value 0 means input file contents are invalid.  */
                   1253:
                   1254: u_short **
                   1255: parsefile (nextline, data, size)
                   1256:      u_short **nextline;
                   1257:      u_short *data;
                   1258:      long size;
                   1259: {
                   1260:   u_short *p, *end;
                   1261:   u_short **line = nextline;
                   1262:
                   1263:   p = data;
                   1264:   end = p + size;
                   1265:   *end = 0;
                   1266:
                   1267:   while (p != end)
                   1268:     {
                   1269:       if (p[0] != '\\' && p[0] != '@')
                   1270:        return 0;
                   1271:
                   1272:       *line = p;
                   1273:       while (*p && *p != '\n')
                   1274:        p++;
                   1275:       if (p != end)
                   1276:        p++;
                   1277:
                   1278:       line++;
                   1279:       if (line == linearray + nlines)
                   1280:        {
                   1281:          u_short **old = linearray;
                   1282:          linearray =
                   1283:              (u_short **) xrealloc (linearray,
                   1284:                                     sizeof (u_short *) * (nlines *= 4));
                   1285:          line += linearray - old;
                   1286:        }
                   1287:     }
                   1288:
                   1289:   return line;
                   1290: }
                   1291: 
                   1292: /* Indexification is a filter applied to the sorted lines
                   1293:    as they are being written to the output file.
                   1294:    Multiple entries for the same name, with different page numbers,
                   1295:    get combined into a single entry with multiple page numbers.
                   1296:    The first braced field, which is used for sorting, is discarded.
                   1297:    However, its first character is examined, folded to lower case,
                   1298:    and if it is different from that in the previous line fed to us
                   1299:    a \initial line is written with one argument, the new initial.
                   1300:
                   1301:    If an entry has four braced fields, then the second and third
                   1302:    constitute primary and secondary names.
                   1303:    In this case, each change of primary name
                   1304:    generates a \primary line which contains only the primary name,
                   1305:    and in between these are \secondary lines which contain
                   1306:    just a secondary name and page numbers. */
                   1307:
                   1308: /* The last primary name we wrote a \primary entry for.
                   1309:    If only one level of indexing is being done, this is the last name seen. */
                   1310: u_short *lastprimary;
                   1311: /* Length of storage allocated for lastprimary. */
                   1312: int lastprimarylength;
                   1313:
                   1314: /* Similar, for the secondary name. */
                   1315: u_short *lastsecondary;
                   1316: int lastsecondarylength;
                   1317:
                   1318: /* Zero if we are not in the middle of writing an entry.
                   1319:    One if we have written the beginning of an entry but have not
                   1320:    yet written any page numbers into it.
                   1321:    Greater than one if we have written the beginning of an entry
                   1322:    plus at least one page number. */
                   1323: int pending;
                   1324:
                   1325: /* The initial (for sorting purposes) of the last primary entry written.
                   1326:    When this changes, a \initial {c} line is written */
                   1327:
                   1328: u_short *lastinitial;
                   1329:
                   1330: int lastinitiallength;
                   1331:
                   1332: /* When we need a string of length 1 for the value of lastinitial,
                   1333:    store it here.  */
                   1334:
                   1335: u_short lastinitial1[2];
                   1336:
                   1337: /* Initialize static storage for writing an index. */
                   1338:
                   1339: void
                   1340: init_index ()
                   1341: {
                   1342:   pending = 0;
                   1343:   lastinitial = lastinitial1;
                   1344:   lastinitial1[0] = 0;
                   1345:   lastinitial1[1] = 0;
                   1346:   lastinitiallength = 0;
                   1347:   lastprimarylength = 100;
                   1348:   lastprimary = (u_short *) xmalloc ((lastprimarylength + 1)*sizeof(u_short));
                   1349:   memset (lastprimary, '\0', (lastprimarylength + 1)*sizeof(u_short));
                   1350:   lastsecondarylength = 100;
                   1351:   lastsecondary =
                   1352:       (u_short *) xmalloc ((lastsecondarylength + 1)*sizeof(u_short));
                   1353:   memset (lastsecondary, '\0', (lastsecondarylength + 1)*sizeof(u_short));
                   1354: }
                   1355:
                   1356: /* Indexify.  Merge entries for the same name,
                   1357:    insert headers for each initial character, etc.  */
                   1358:
                   1359: void
                   1360: indexify (u_short *line, FILE *ostream)
                   1361: {
                   1362:   u_short *primary, *secondary, *pagenumber;
                   1363:   int primarylength, secondarylength = 0, pagelength;
                   1364:   int nosecondary;
                   1365:   int initiallength;
                   1366:   u_short *initial;
                   1367:   u_short initial1[2];
                   1368:   register u_short *p;
                   1369:
                   1370:   /* First, analyze the parts of the entry fed to us this time. */
                   1371:
                   1372:   p = find_braced_pos (line, 0, 0, 0);
                   1373:   if (*p == '{')
                   1374:     {
                   1375:       initial = p;
                   1376:       /* Get length of inner pair of braces starting at `p',
                   1377:         including that inner pair of braces.  */
                   1378:       initiallength = find_braced_end (p + 1) + 1 - p;
                   1379:     }
                   1380:   else
                   1381:     {
                   1382:       initial = initial1;
                   1383:       initial1[0] = *p & 0x7F7F;
                   1384:       initial1[1] = 0;
                   1385:       initiallength = 1;
                   1386:
                   1387:       if (initial1[0] >= 'a' && initial1[0] <= 'z')
                   1388:        initial1[0] -= ' ';
                   1389:       else if (initial1[0] > 0x2000)
                   1390:        initial1[0] = char_order[initial1[0]];
                   1391:     }
                   1392:
                   1393:   pagenumber = find_braced_pos (line, 1, 0, 0);
                   1394:   pagelength = find_braced_end (pagenumber) - pagenumber;
                   1395:   if (pagelength == 0)
                   1396:     abort ();
                   1397:
                   1398:   primary = find_braced_pos (line, 2, 0, 0);
                   1399:   primarylength = find_braced_end (primary) - primary;
                   1400:
                   1401:   secondary = find_braced_pos (line, 3, 0, 0);
                   1402:   nosecondary = !*secondary;
                   1403:   if (!nosecondary)
                   1404:     secondarylength = find_braced_end (secondary) - secondary;
                   1405:
                   1406:   /* If the primary is different from before, make a new primary entry. */
                   1407:   if (wstrncmp (primary, lastprimary, primarylength))
                   1408:     {
                   1409:       /* Close off current secondary entry first, if one is open. */
                   1410:       if (pending)
                   1411:        {
                   1412:          fputs ("}\n", ostream);
                   1413:          pending = 0;
                   1414:        }
                   1415:
                   1416:       /* If this primary has a different initial, include an entry for
                   1417:         the initial. */
                   1418:       if (initiallength != lastinitiallength ||
                   1419:          (*initial && wstrncmp (initial, lastinitial, initiallength)))
                   1420:        {
                   1421:          fprintf (ostream, "\\initial {");
                   1422:          wfwrite (initial, initiallength, 1, ostream);
                   1423:          fprintf (ostream, "}\n", initial);
                   1424:          if (initial == initial1)
                   1425:            {
                   1426:              lastinitial = lastinitial1;
                   1427:              *lastinitial1 = *initial1;
                   1428:            }
                   1429:          else
                   1430:            {
                   1431:              lastinitial = initial;
                   1432:            }
                   1433:          lastinitiallength = initiallength;
                   1434:        }
                   1435:
                   1436:       /* Make the entry for the primary.  */
                   1437:       if (nosecondary)
                   1438:        fputs ("\\entry {", ostream);
                   1439:       else
                   1440:        fputs ("\\primary {", ostream);
                   1441:       wfwrite (primary, primarylength, 1, ostream);
                   1442:       if (nosecondary)
                   1443:        {
                   1444:          fputs ("}{", ostream);
                   1445:          pending = 1;
                   1446:        }
                   1447:       else
                   1448:        fputs ("}\n", ostream);
                   1449:
                   1450:       /* Record name of most recent primary. */
                   1451:       if (lastprimarylength < primarylength)
                   1452:        {
                   1453:          lastprimarylength = primarylength + 100;
                   1454:          lastprimary =
                   1455:              (u_short *) xrealloc (lastprimary,
                   1456:                                    (1 + lastprimarylength)*sizeof(u_short));
                   1457:        }
                   1458:       memcpy (lastprimary, primary, primarylength*sizeof(u_short));
                   1459:       lastprimary[primarylength] = 0;
                   1460:
                   1461:       /* There is no current secondary within this primary, now. */
                   1462:       lastsecondary[0] = 0;
                   1463:     }
                   1464:
                   1465:   /* Should not have an entry with no subtopic following one with a subtopic. */
                   1466:
                   1467:   if (nosecondary && *lastsecondary)
                   1468:       {
                   1469:       char  tline[2001];
                   1470:       int   len;
                   1471:
                   1472:       len   = wtoc(tline, line, 2000);         /* 2000 is much enough ? */
                   1473:       tline[len]   = '\0';
                   1474:       error ("entry %s follows an entry with a secondary name", tline);
                   1475:       }
                   1476:
                   1477:   /* Start a new secondary entry if necessary. */
                   1478:   if (!nosecondary && wstrncmp (secondary, lastsecondary, secondarylength))
                   1479:     {
                   1480:       if (pending)
                   1481:        {
                   1482:          fputs ("}\n", ostream);
                   1483:          pending = 0;
                   1484:        }
                   1485:
                   1486:       /* Write the entry for the secondary.  */
                   1487:       fputs ("\\secondary {", ostream);
                   1488:       wfwrite (secondary, secondarylength, 1, ostream);
                   1489:       fputs ("}{", ostream);
                   1490:       pending = 1;
                   1491:
                   1492:       /* Record name of most recent secondary. */
                   1493:       if (lastsecondarylength < secondarylength)
                   1494:        {
                   1495:          lastsecondarylength = secondarylength + 100;
                   1496:          lastsecondary =
                   1497:              (u_short *) xrealloc ((lastsecondary,
                   1498:                                     1 + lastsecondarylength)*sizeof(u_short));
                   1499:        }
                   1500:       memcpy (lastsecondary, secondary, secondarylength * sizeof(u_short));
                   1501:       lastsecondary[secondarylength] = 0;
                   1502:     }
                   1503:
                   1504:   /* Here to add one more page number to the current entry. */
                   1505:   if (pending++ != 1)
                   1506:     fputs (", ", ostream);     /* Punctuate first, if this is not the first. */
                   1507:   wfwrite (pagenumber, pagelength, 1, ostream);
                   1508: }
                   1509:
                   1510: /* Close out any unfinished output entry. */
                   1511:
                   1512: void
                   1513: finish_index (ostream)
                   1514:      FILE *ostream;
                   1515: {
                   1516:   if (pending)
                   1517:     fputs ("}\n", ostream);
                   1518:   free (lastprimary);
                   1519:   free (lastsecondary);
                   1520: }
                   1521: 
                   1522: /* Copy the lines in the sorted order.
                   1523:    Each line is copied out of the input file it was found in. */
                   1524:
                   1525: void
                   1526: writelines (linearray, nlines, ostream)
                   1527:      u_short **linearray;
                   1528:      int nlines;
                   1529:      FILE *ostream;
                   1530: {
                   1531:   u_short **stop_line = linearray + nlines;
                   1532:   u_short **next_line;
                   1533:
                   1534:   init_index ();
                   1535:
                   1536:   /* Output the text of the lines, and free the buffer space. */
                   1537:
                   1538:   for (next_line = linearray; next_line != stop_line; next_line++)
                   1539:     {
                   1540:       /* If -u was specified, output the line only if distinct from previous one.  */
                   1541:       if (next_line == linearray
                   1542:       /* Compare previous line with this one, using only the
                   1543:          explicitly specd keyfields. */
                   1544:          || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
                   1545:        {
                   1546:          u_short *p = *next_line;
                   1547:          u_short c;
                   1548:
                   1549:          while ((c = *p++) && c != '\n')
                   1550:            /* Do nothing. */ ;
                   1551:          *(p - 1) = 0;
                   1552:          indexify (*next_line, ostream);
                   1553:        }
                   1554:     }
                   1555:
                   1556:   finish_index (ostream);
                   1557: }
                   1558: 
                   1559: /* Assume (and optionally verify) that each input file is sorted;
                   1560:    merge them and output the result.
                   1561:    Returns nonzero if any input file fails to be sorted.
                   1562:
                   1563:    This is the high-level interface that can handle an unlimited
                   1564:    number of files.  */
                   1565:
                   1566: #define MAX_DIRECT_MERGE 10
                   1567:
                   1568: int
                   1569: merge_files (infiles, nfiles, outfile)
                   1570:      char **infiles;
                   1571:      int nfiles;
                   1572:      char *outfile;
                   1573: {
                   1574:   char **tempfiles;
                   1575:   int ntemps;
                   1576:   int i;
                   1577:   int value = 0;
                   1578:   int start_tempcount = tempcount;
                   1579:
                   1580:   if (nfiles <= MAX_DIRECT_MERGE)
                   1581:     return merge_direct (infiles, nfiles, outfile);
                   1582:
                   1583:   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
                   1584:      making a temporary file to hold each group's result.  */
                   1585:
                   1586:   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
                   1587:   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
                   1588:   for (i = 0; i < ntemps; i++)
                   1589:     {
                   1590:       int nf = MAX_DIRECT_MERGE;
                   1591:       if (i + 1 == ntemps)
                   1592:        nf = nfiles - i * MAX_DIRECT_MERGE;
                   1593:       tempfiles[i] = maketempname (++tempcount);
                   1594:       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
                   1595:     }
                   1596:
                   1597:   /* All temporary files that existed before are no longer needed
                   1598:      since their contents have been merged into our new tempfiles.
                   1599:      So delete them.  */
                   1600:   flush_tempfiles (start_tempcount);
                   1601:
                   1602:   /* Now merge the temporary files we created.  */
                   1603:
                   1604:   merge_files (tempfiles, ntemps, outfile);
                   1605:
                   1606:   free (tempfiles);
                   1607:
                   1608:   return value;
                   1609: }
                   1610: 
                   1611: /* Assume (and optionally verify) that each input file is sorted;
                   1612:    merge them and output the result.
                   1613:    Returns nonzero if any input file fails to be sorted.
                   1614:
                   1615:    This version of merging will not work if the number of
                   1616:    input files gets too high.  Higher level functions
                   1617:    use it only with a bounded number of input files.  */
                   1618:
                   1619: int
                   1620: merge_direct (infiles, nfiles, outfile)
                   1621:      char **infiles;
                   1622:      int nfiles;
                   1623:      char *outfile;
                   1624: {
                   1625:   struct linebuffer *lb1, *lb2;
                   1626:   struct linebuffer **thisline, **prevline;
                   1627:   FILE **streams;
                   1628:   int i;
                   1629:   int nleft;
                   1630:   int lossage = 0;
                   1631:   int *file_lossage;
                   1632:   struct linebuffer *prev_out = 0;
                   1633:   FILE *ostream = stdout;
                   1634:
                   1635:   if (outfile)
                   1636:     {
                   1637:       ostream = fopen (outfile, "w");
                   1638:     }
                   1639:   if (!ostream)
                   1640:     pfatal_with_name (outfile);
                   1641:
                   1642:   init_index ();
                   1643:
                   1644:   if (nfiles == 0)
                   1645:     {
                   1646:       if (outfile)
                   1647:        fclose (ostream);
                   1648:       return 0;
                   1649:     }
                   1650:
                   1651:   /* For each file, make two line buffers.
                   1652:      Also, for each file, there is an element of `thisline'
                   1653:      which points at any time to one of the file's two buffers,
                   1654:      and an element of `prevline' which points to the other buffer.
                   1655:      `thisline' is supposed to point to the next available line from the file,
                   1656:      while `prevline' holds the last file line used,
                   1657:      which is remembered so that we can verify that the file is properly sorted. */
                   1658:
                   1659:   /* lb1 and lb2 contain one buffer each per file. */
                   1660:   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
                   1661:   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
                   1662:
                   1663:   /* thisline[i] points to the linebuffer holding the next available line in file i,
                   1664:      or is zero if there are no lines left in that file.  */
                   1665:   thisline = (struct linebuffer **)
                   1666:     xmalloc (nfiles * sizeof (struct linebuffer *));
                   1667:   /* prevline[i] points to the linebuffer holding the last used line
                   1668:      from file i.  This is just for verifying that file i is properly
                   1669:      sorted.  */
                   1670:   prevline = (struct linebuffer **)
                   1671:     xmalloc (nfiles * sizeof (struct linebuffer *));
                   1672:   /* streams[i] holds the input stream for file i.  */
                   1673:   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
                   1674:   /* file_lossage[i] is nonzero if we already know file i is not
                   1675:      properly sorted.  */
                   1676:   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
                   1677:
                   1678:   /* Allocate and initialize all that storage. */
                   1679:
                   1680:   for (i = 0; i < nfiles; i++)
                   1681:     {
                   1682:       initbuffer (&lb1[i]);
                   1683:       initbuffer (&lb2[i]);
                   1684:       thisline[i] = &lb1[i];
                   1685:       prevline[i] = &lb2[i];
                   1686:       file_lossage[i] = 0;
                   1687:       streams[i] = fopen (infiles[i], "r");
                   1688:       if (!streams[i])
                   1689:        pfatal_with_name (infiles[i]);
                   1690:
                   1691:       readline (thisline[i], streams[i]);
                   1692:     }
                   1693:
                   1694:   /* Keep count of number of files not at eof. */
                   1695:   nleft = nfiles;
                   1696:
                   1697:   while (nleft)
                   1698:     {
                   1699:       struct linebuffer *best = 0;
                   1700:       struct linebuffer *exch;
                   1701:       int bestfile = -1;
                   1702:       int i;
                   1703:
                   1704:       /* Look at the next avail line of each file; choose the least one.  */
                   1705:
                   1706:       for (i = 0; i < nfiles; i++)
                   1707:        {
                   1708:          if (thisline[i] &&
                   1709:              (!best ||
                   1710:               0 < compare_general (best->buffer, thisline[i]->buffer,
                   1711:                                 (long) bestfile, (long) i, num_keyfields)))
                   1712:            {
                   1713:              best = thisline[i];
                   1714:              bestfile = i;
                   1715:            }
                   1716:        }
                   1717:
                   1718:       /* Output that line, unless it matches the previous one and we
                   1719:         don't want duplicates. */
                   1720:
                   1721:       if (!(prev_out &&
                   1722:            !compare_general (prev_out->buffer,
                   1723:                              best->buffer, 0L, 1L, num_keyfields - 1)))
                   1724:        indexify (best->buffer, ostream);
                   1725:       prev_out = best;
                   1726:
                   1727:       /* Now make the line the previous of its file, and fetch a new
                   1728:         line from that file.  */
                   1729:
                   1730:       exch = prevline[bestfile];
                   1731:       prevline[bestfile] = thisline[bestfile];
                   1732:       thisline[bestfile] = exch;
                   1733:
                   1734:       while (1)
                   1735:        {
                   1736:          /* If the file has no more, mark it empty. */
                   1737:
                   1738:          if (feof (streams[bestfile]))
                   1739:            {
                   1740:              thisline[bestfile] = 0;
                   1741:              /* Update the number of files still not empty. */
                   1742:              nleft--;
                   1743:              break;
                   1744:            }
                   1745:          readline (thisline[bestfile], streams[bestfile]);
                   1746:          if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
                   1747:            break;
                   1748:        }
                   1749:     }
                   1750:
                   1751:   finish_index (ostream);
                   1752:
                   1753:   /* Free all storage and close all input streams. */
                   1754:
                   1755:   for (i = 0; i < nfiles; i++)
                   1756:     {
                   1757:       fclose (streams[i]);
                   1758:       free (lb1[i].buffer);
                   1759:       free (lb2[i].buffer);
                   1760:     }
                   1761:   free (file_lossage);
                   1762:   free (lb1);
                   1763:   free (lb2);
                   1764:   free (thisline);
                   1765:   free (prevline);
                   1766:   free (streams);
                   1767:
                   1768:   if (outfile)
                   1769:     fclose (ostream);
                   1770:
                   1771:   return lossage;
                   1772: }
                   1773: 
                   1774: /* Print error message and exit.  */
                   1775:
                   1776: void
                   1777: fatal (format, arg)
                   1778:      char *format, *arg;
                   1779: {
                   1780:   error (format, arg);
                   1781:   exit (TI_FATAL_ERROR);
                   1782: }
                   1783:
                   1784: /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
                   1785: void
                   1786: error (format, arg)
                   1787:      char *format, *arg;
                   1788: {
                   1789:   printf ("%s: ", program_name);
                   1790:   printf (format, arg);
                   1791:   if (format[strlen (format) -1] != '\n')
                   1792:     printf ("\n");
                   1793: }
                   1794:
                   1795: void
                   1796: perror_with_name (name)
                   1797:      char *name;
                   1798: {
                   1799:   char *s;
                   1800:
                   1801:   s = strerror (errno);
                   1802:   printf ("%s: ", program_name);
                   1803:   printf ("%s; for file `%s'.\n", s, name);
                   1804: }
                   1805:
                   1806: void
                   1807: pfatal_with_name (name)
                   1808:      char *name;
                   1809: {
                   1810:   char *s;
                   1811:
                   1812:   s = strerror (errno);
                   1813:   printf ("%s: ", program_name);
                   1814:   printf ("%s; for file `%s'.\n", s, name);
                   1815:   exit (TI_FATAL_ERROR);
                   1816: }
                   1817:
                   1818: /* Return a newly-allocated string whose contents concatenate those of
                   1819:    S1, S2, S3.  */
                   1820:
                   1821: char *
                   1822: concat (s1, s2, s3)
                   1823:      char *s1, *s2, *s3;
                   1824: {
                   1825:   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
                   1826:   char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
                   1827:
                   1828:   strcpy (result, s1);
                   1829:   strcpy (result + len1, s2);
                   1830:   strcpy (result + len1 + len2, s3);
                   1831:   *(result + len1 + len2 + len3) = 0;
                   1832:
                   1833:   return result;
                   1834: }
                   1835:
                   1836: #if !defined (HAVE_STRERROR)
                   1837: extern char *sys_errlist[];
                   1838: extern int sys_nerr;
                   1839:
                   1840: char *
                   1841: strerror (num)
                   1842:      int num;
                   1843: {
                   1844:   if (num >= sys_nerr)
                   1845:     return ("");
                   1846:   else
                   1847:     return (sys_errlist[num]);
                   1848: }
                   1849: #endif /* !HAVE_STRERROR */
                   1850:
                   1851: #if !defined (HAVE_STRCHR)
                   1852: char *
                   1853: strrchr (string, character)
                   1854:      char *string;
                   1855:      int character;
                   1856: {
                   1857:   register int i;
                   1858:
                   1859:   for (i = strlen (string) - 1; i > -1; i--)
                   1860:     if (string[i] == character)
                   1861:       return (string + i);
                   1862:
                   1863:   return ((char *)NULL);
                   1864: }
                   1865: #endif /* HAVE_STRCHR */
                   1866:
                   1867: /* Just like malloc, but kills the program in case of fatal error. */
                   1868: void *
                   1869: xmalloc (nbytes)
                   1870:      int nbytes;
                   1871: {
                   1872:   void *temp = (void *) malloc (nbytes);
                   1873:
                   1874:   if (nbytes && temp == (void *)NULL)
                   1875:     memory_error ("xmalloc", nbytes);
                   1876:
                   1877:   return (temp);
                   1878: }
                   1879:
                   1880: /* Like realloc (), but barfs if there isn't enough memory. */
                   1881: void *
                   1882: xrealloc (pointer, nbytes)
                   1883:      void *pointer;
                   1884:      int nbytes;
                   1885: {
                   1886:   void *temp;
                   1887:
                   1888:   if (!pointer)
                   1889:     temp = (void *)xmalloc (nbytes);
                   1890:   else
                   1891:     temp = (void *)realloc (pointer, nbytes);
                   1892:
                   1893:   if (nbytes && !temp)
                   1894:     memory_error ("xrealloc", nbytes);
                   1895:
                   1896:   return (temp);
                   1897: }
                   1898:
                   1899: void
                   1900: memory_error (callers_name, bytes_wanted)
                   1901:      char *callers_name;
                   1902:      int bytes_wanted;
                   1903: {
                   1904:   char printable_string[80];
                   1905:
                   1906:   sprintf (printable_string,
                   1907:           "Virtual memory exhausted in %s ()!  Needed %d bytes.",
                   1908:           callers_name, bytes_wanted);
                   1909:
                   1910:   error (printable_string);
                   1911:   abort();
                   1912: }
                   1913:
                   1914: 
                   1915: static int
                   1916: wstrncmp(const u_short *s1, const u_short *s2, size_t n)
                   1917:     {
                   1918:     int            i;
                   1919:     int            d = 0;
                   1920:
                   1921:     for (i=0; i<n && d == 0; i++)
                   1922:        d   = s1[i] - s2[i];
                   1923:
                   1924:     return  d;
                   1925:     }
                   1926:
                   1927:
                   1928: static int
                   1929: wfwrite(u_short *ptr, size_t  size,  size_t  nmemb,  FILE*stream)
                   1930:     {
                   1931:     char    *cptr;
                   1932:     int            ret, len;
                   1933:
                   1934:     cptr    = (char *)xmalloc(size * nmemb * sizeof(u_short)); /* 丼一杯! */
                   1935:     len        = wtoc(cptr, ptr, size*nmemb);
                   1936:     assert(len <= (size * nmemb * sizeof(u_short)));
                   1937:
                   1938:     ret        = fwrite(cptr, len, nmemb, stream);
                   1939:     free(cptr);
                   1940:     return  ret;
                   1941:     }
                   1942:
                   1943:
                   1944: static int
                   1945: wtoc(char *cptr, u_short const *ptr, int len)
                   1946:     {
                   1947:     char       *tptr;
                   1948:     u_short        c;
                   1949:     int                    i;
                   1950:
                   1951:     tptr    = cptr;
                   1952:     for (i = 0; i < len; i++)
                   1953:        {
                   1954:        c   = ptr[i];
                   1955:        if (G1_CHAR(c))
                   1956:            {
                   1957:            *tptr++ = c >> 8 | 0x0080;  /* to EUC */
                   1958:            c   &= 0x007F;
                   1959:            c   |= 0x0080;
                   1960:            }
                   1961:        *tptr++ = c;
                   1962:        }
                   1963:
                   1964:     return  tptr - cptr;
                   1965:     }
                   1966:
                   1967:
                   1968: static int
                   1969: Vowel_of(int c)
                   1970:     {
                   1971:     if (0x2521 <= c && c < 0x2577)             /* 片仮名は */
                   1972:        c   -= 0x100;                           /* 平仮名へ */
                   1973:     /* ヴ、ヵ、ヶ、の3字は相当する平仮名はないが、コードがあると仮定する */
                   1974:     /* 普通は問題にならない筈。        */
                   1975:
                   1976:     if (0x2421 <= c && c < 0x2474)
                   1977:        return  Vowel_Table[c - 0x2420];
                   1978:     else
                   1979:        return  c;                              /* 平仮名以外はそのまま */
                   1980:     }
                   1981:
                   1982:
                   1983: static void
                   1984: init_Vowel_Table(void)
                   1985:     {
                   1986:     int            i;
                   1987:
                   1988:     /* あ〜ぞ */
                   1989:     for (i = 0x2421 - 0x2420; i < 0x2445 - 0x2420; i += 10)
                   1990:        {
                   1991:        Vowel_Table[i]      = Vowel_Table[i+1]    = 0x2422; /* あ段 */
                   1992:        Vowel_Table[i+2]    = Vowel_Table[i+3]    = 0x2424; /* い段 */
                   1993:        Vowel_Table[i+4]    = Vowel_Table[i+5]    = 0x2426; /* う段 */
                   1994:        Vowel_Table[i+6]    = Vowel_Table[i+7]    = 0x2428; /* え段 */
                   1995:        Vowel_Table[i+8]    = Vowel_Table[i+9]    = 0x242A; /* お段 */
                   1996:        }
                   1997:     Vowel_Table[0x243F - 0x2420]    = 0x2422;  /* た */
                   1998:     Vowel_Table[0x2440 - 0x2420]    = 0x2422;  /* だ */
                   1999:     Vowel_Table[0x2441 - 0x2420]    = 0x2424;  /* ち */
                   2000:     Vowel_Table[0x2442 - 0x2420]    = 0x2424;  /* ぢ */
                   2001:     Vowel_Table[0x2443 - 0x2420]    = 0x2426;  /* っ */
                   2002:     Vowel_Table[0x2444 - 0x2420]    = 0x2426;  /* つ */
                   2003:     Vowel_Table[0x2445 - 0x2420]    = 0x2426;  /* づ */
                   2004:     Vowel_Table[0x2446 - 0x2420]    = 0x2428;  /* て */
                   2005:     Vowel_Table[0x2447 - 0x2420]    = 0x2428;  /* で */
                   2006:     Vowel_Table[0x2448 - 0x2420]    = 0x242A;  /* と */
                   2007:     Vowel_Table[0x2449 - 0x2420]    = 0x242A;  /* ど */
                   2008:     Vowel_Table[0x244A - 0x2420]    = 0x2422;  /* な */
                   2009:     Vowel_Table[0x244B - 0x2420]    = 0x2424;  /* に */
                   2010:     Vowel_Table[0x244C - 0x2420]    = 0x2426;  /* ぬ */
                   2011:     Vowel_Table[0x244D - 0x2420]    = 0x2428;  /* ね */
                   2012:     Vowel_Table[0x244E - 0x2420]    = 0x242A;  /* の */
                   2013:     /* は〜ぽ */
                   2014:     for (i = 0x244F - 0x2420; i < 0x245E - 0x2420; i += 15)
                   2015:        {
                   2016:        Vowel_Table[i]      = Vowel_Table[i+1]  = Vowel_Table[i+2]  = 0x2422;
                   2017:        Vowel_Table[i+3]    = Vowel_Table[i+4]  = Vowel_Table[i+5]  = 0x2424;
                   2018:        Vowel_Table[i+6]    = Vowel_Table[i+7]  = Vowel_Table[i+8]  = 0x2426;
                   2019:        Vowel_Table[i+9]    = Vowel_Table[i+11] = Vowel_Table[i+12] = 0x2428;
                   2020:        Vowel_Table[i+12]   = Vowel_Table[i+13] = Vowel_Table[i+14] = 0x242A;
                   2021:        }
                   2022:     Vowel_Table[0x245E - 0x2420]    = 0x2422;  /* ま */
                   2023:     Vowel_Table[0x245F - 0x2420]    = 0x2424;  /* み */
                   2024:     Vowel_Table[0x2460 - 0x2420]    = 0x2426;  /* む */
                   2025:     Vowel_Table[0x2461 - 0x2420]    = 0x2428;  /* め */
                   2026:     Vowel_Table[0x2462 - 0x2420]    = 0x242A;  /* も */
                   2027:     Vowel_Table[0x2463 - 0x2420]    = 0x2422;  /* ゃ */
                   2028:     Vowel_Table[0x2464 - 0x2420]    = 0x2422;  /* や */
                   2029:     Vowel_Table[0x2465 - 0x2420]    = 0x2426;  /* ゅ */
                   2030:     Vowel_Table[0x2466 - 0x2420]    = 0x2426;  /* ゆ */
                   2031:     Vowel_Table[0x2467 - 0x2420]    = 0x242A;  /* ょ */
                   2032:     Vowel_Table[0x2468 - 0x2420]    = 0x242A;  /* よ */
                   2033:     Vowel_Table[0x2469 - 0x2420]    = 0x2422;  /* ら */
                   2034:     Vowel_Table[0x246A - 0x2420]    = 0x2424;  /* り */
                   2035:     Vowel_Table[0x246B - 0x2420]    = 0x2426;  /* る */
                   2036:     Vowel_Table[0x246C - 0x2420]    = 0x2428;  /* れ */
                   2037:     Vowel_Table[0x246D - 0x2420]    = 0x242A;  /* ろ */
                   2038:     Vowel_Table[0x246E - 0x2420]    = 0x2422;  /* ゎ */
                   2039:     Vowel_Table[0x246F - 0x2420]    = 0x2422;  /* わ */
                   2040:     /* ゐ〜ヶ は、そのまま。母音への変換はしない。 */
                   2041:     for (i = 0x2470 - 0x2420; i < 0x2477 - 0x2420; i++)
                   2042:        Vowel_Table[i]  = i;
                   2043:     }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>