[BACK]Return to texindex.c CVS log [TXT][DIR] Up to [local] / OpenXM / src / asir-doc / jtexindex / C

Annotation of OpenXM/src/asir-doc/jtexindex/C/texindex.c, Revision 1.2

1.1       noro        1: /* Prepare TeX index dribble output into an actual index.
                      2:
                      3:    Version 1.45
                      4:
                      5:    Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc.
                      6:
                      7:    This program is free software; you can redistribute it and/or modify
                      8:    it under the terms of the GNU General Public License as published by
                      9:    the Free Software Foundation; either version 2, or (at your option)
                     10:    any later version.
                     11:
                     12:    This program is distributed in the hope that it will be useful,
                     13:    but WITHOUT ANY WARRANTY; without even the implied warranty of
                     14:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     15:    GNU General Public License for more details.
                     16:
                     17:    You should have received a copy of the GNU General Public License
                     18:    along with this program; if not, write to the Free Software
                     19:    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
                     20:
                     21: /*
                     22:    95.10.18 modified by Nobuyosi KAMEI <CXK03514@niftyserve.or.jp>
                     23:        日本語EUCが処理できるようにした。
                     24:    95.10.19 modified by K. Handa <handa@etl.go.jp>
                     25:        \initial{...} の出力を調整。
                     26: */
                     27: 
                     28:
                     29: #include <stdio.h>
                     30: #include <ctype.h>
                     31: #include <errno.h>
                     32: #include    <assert.h>
                     33: #include "getopt.h"
                     34:
                     35: #define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4"
                     36: #define TEXINDEX_MODIFICATION_STRING "(modified for Japanese EUC encoding)"
                     37:
                     38: #if defined (emacs)
                     39: #  include "../src/config.h"
                     40: /* Some s/os.h files redefine these. */
                     41: #  undef read
                     42: #  undef close
                     43: #  undef write
                     44: #  undef open
                     45: #endif
                     46:
1.2     ! ohara      47: #if defined(__MINGW32__)
        !            48: #if !defined(_BSDTYPES_DEFINED)
        !            49: typedef unsigned char u_char;
        !            50: typedef unsigned short u_short;
        !            51: typedef unsigned int u_int;
        !            52: typedef unsigned long u_long;
        !            53: #define _BSDTYPES_DEFINED
        !            54: #endif /* _BSDTYPES_DEFINED */
        !            55: #endif /* __MINGW32__*/
        !            56:
1.1       noro       57: #if defined (HAVE_STRING_H)
                     58: #  include <string.h>
                     59: #endif /* HAVE_STRING_H */
                     60:
                     61: #if !defined (HAVE_STRCHR)
                     62: char *strrchr ();
                     63: #endif /* !HAVE_STRCHR */
                     64:
                     65: #if defined (STDC_HEADERS)
                     66: #  include <stdlib.h>
                     67: #else /* !STDC_HEADERS */
                     68: char *getenv (), *malloc (), *realloc ();
                     69: #endif /* !STDC_HEADERS */
                     70:
                     71: #if defined (HAVE_UNISTD_H)
                     72: #  include <unistd.h>
                     73: #else /* !HAVE_UNISTD_H */
                     74: off_t lseek ();
                     75: #endif /* !HAVE_UNISTD_H */
                     76:
                     77: #if !defined (HAVE_MEMSET)
                     78: #undef memset
                     79: #define memset(ptr, ignore, count) bzero (ptr, count)
                     80: #endif
                     81:
                     82:
                     83: char *mktemp ();
                     84:
                     85: #if defined (VMS)
                     86: #  include <file.h>
                     87: #  define TI_NO_ERROR ((1 << 28) | 1)
                     88: #  define TI_FATAL_ERROR ((1 << 28) | 4)
                     89: #  define unlink delete
                     90: #else /* !VMS */
                     91: #  if defined (HAVE_SYS_FCNTL_H)
                     92: #    include <sys/types.h>
                     93: #    include <sys/fcntl.h>
                     94: #  endif /* HAVE_SYS_FCNTL_H */
                     95:
                     96: #  if defined (_AIX) || !defined (_POSIX_VERSION)
                     97: #    include <sys/file.h>
                     98: #  else /* !AIX && _POSIX_VERSION */
                     99: #    if !defined (HAVE_SYS_FCNTL_H)
                    100: #      include <fcntl.h>
                    101: #    endif /* !HAVE_FCNTL_H */
                    102: #  endif /* !_AIX && _POSIX_VERSION */
                    103: #  define TI_NO_ERROR 0
                    104: #  define TI_FATAL_ERROR 1
                    105: #endif /* !VMS */
                    106:
                    107: #if !defined (SEEK_SET)
                    108: #  define SEEK_SET 0
                    109: #  define SEEK_CUR 1
                    110: #  define SEEK_END 2
                    111: #endif /* !SEEK_SET */
                    112:
                    113: #if !defined (errno)
                    114: extern int errno;
                    115: #endif
                    116: char *strerror ();
                    117:
                    118: /* When sorting in core, this structure describes one line
                    119:    and the position and length of its first keyfield.  */
                    120: struct lineinfo
                    121: {
                    122:   u_short *text;               /* The actual text of the line. */
                    123:   union {
                    124:     u_short *text;     /* The start of the key (for textual comparison). */
                    125:     long number;       /* The numeric value (for numeric comparison). */
                    126:   } key;
                    127:   long keylen;         /* Length of KEY field. */
                    128: };
                    129:
                    130: /* This structure describes a field to use as a sort key. */
                    131: struct keyfield
                    132: {
                    133:   int startwords;      /* Number of words to skip. */
                    134:   int startchars;      /* Number of additional chars to skip. */
                    135:   int endwords;                /* Number of words to ignore at end. */
                    136:   int endchars;                /* Ditto for characters of last word. */
                    137:   char ignore_blanks;  /* Non-zero means ignore spaces and tabs. */
                    138:   char fold_case;      /* Non-zero means case doesn't matter. */
                    139:   char reverse;                /* Non-zero means compare in reverse order. */
                    140:   char numeric;                /* Non-zeros means field is ASCII numeric. */
                    141:   char positional;     /* Sort according to file position. */
                    142:   char braced;         /* Count balanced-braced groupings as fields. */
                    143: };
                    144:
                    145: /* Vector of keyfields to use. */
                    146: struct keyfield keyfields[3];
                    147:
                    148: /* Number of keyfields stored in that vector.  */
                    149: int num_keyfields = 3;
                    150:
                    151: /* Vector of input file names, terminated with a null pointer. */
                    152: char **infiles;
                    153:
                    154: /* Vector of corresponding output file names, or NULL, meaning default it
                    155:    (add an `s' to the end). */
                    156: char **outfiles;
                    157:
                    158: /* Length of `infiles'. */
                    159: int num_infiles;
                    160:
                    161: /* Pointer to the array of pointers to lines being sorted. */
                    162: u_short **linearray;
                    163:
                    164: /* The allocated length of `linearray'. */
                    165: long nlines;
                    166:
                    167: /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
                    168: char *tempdir;
                    169:
                    170: /* Start of filename to use for temporary files.  */
                    171: char *tempbase;
                    172:
                    173: /* Number of last temporary file.  */
                    174: int tempcount;
                    175:
                    176: /* Number of last temporary file already deleted.
                    177:    Temporary files are deleted by `flush_tempfiles' in order of creation.  */
                    178: int last_deleted_tempcount;
                    179:
                    180: /* During in-core sort, this points to the base of the data block
                    181:    which contains all the lines of data.  */
                    182: u_short *text_base;
                    183:
                    184: /* Additional command switches .*/
                    185:
                    186: /* Nonzero means do not delete tempfiles -- for debugging. */
                    187: int keep_tempfiles;
                    188:
                    189: /* The name this program was run with. */
                    190: char *program_name;
                    191:
                    192: /* 仮名文字の母音テーブル */
                    193: int Vowel_Table[0x60];
                    194:
                    195: /* Forward declarations of functions in this file. */
                    196:
                    197: void decode_command ();
                    198: void sort_in_core ();
                    199: void sort_offline ();
                    200: u_short **parsefile ();
                    201: u_short *find_field ();
                    202: u_short *find_pos ();
                    203: long find_value ();
                    204: u_short *find_braced_pos (u_short *str, int words, int chars, int ignore_blanks);
                    205: u_short *find_braced_end ();
                    206: void writelines (u_short **linearray, int nlines, FILE *ostream);
                    207: int compare_field ();
                    208: int compare_full ();
                    209: long readline ();
                    210: int merge_files ();
                    211: int merge_direct ();
                    212: void pfatal_with_name ();
                    213: void fatal ();
                    214: void error ();
                    215: void *xmalloc (), *xrealloc ();
                    216: char *concat ();
                    217: char *maketempname ();
                    218: void flush_tempfiles ();
                    219: char *tempcopy ();
                    220: static void    memory_error();
                    221: static int wstrncmp(const u_short *s1, const u_short *s2, size_t n);
                    222: static int wfwrite(u_short *ptr, size_t  size,  size_t  nmemb,  FILE*stream);
                    223: static int wtoc(char *cptr, u_short const *ptr, int len);
                    224: static int  Vowel_of(int c);
                    225: static void init_Vowel_Table(void);
                    226:
                    227: #define        EUC_BYTE(c) (0x00A1 <= c && c <= 0x00FE)
                    228: #define G1_CHAR(c) (c & 0xFF00)
                    229: 
                    230: #define MAX_IN_CORE_SORT 500000
                    231:
                    232: void
                    233: main (argc, argv)
                    234:      int argc;
                    235:      char **argv;
                    236: {
                    237:   int i;
                    238:
                    239:   tempcount = 0;
                    240:   last_deleted_tempcount = 0;
                    241:
                    242:   program_name = strrchr (argv[0], '/');
                    243:   if (program_name != (char *)NULL)
                    244:     program_name++;
                    245:   else
                    246:     program_name = argv[0];
                    247:
                    248:   /* Describe the kind of sorting to do. */
                    249:   /* The first keyfield uses the first braced field and folds case. */
                    250:   keyfields[0].braced = 1;
                    251:   keyfields[0].fold_case = 1;
                    252:   keyfields[0].endwords = -1;
                    253:   keyfields[0].endchars = -1;
                    254:
                    255:   /* The second keyfield uses the second braced field, numerically. */
                    256:   keyfields[1].braced = 1;
                    257:   keyfields[1].numeric = 1;
                    258:   keyfields[1].startwords = 1;
                    259:   keyfields[1].endwords = -1;
                    260:   keyfields[1].endchars = -1;
                    261:
                    262:   /* The third keyfield (which is ignored while discarding duplicates)
                    263:      compares the whole line. */
                    264:   keyfields[2].endwords = -1;
                    265:   keyfields[2].endchars = -1;
                    266:
                    267:   decode_command (argc, argv);
                    268:
                    269:   tempbase = mktemp (concat ("txiXXXXXX", "", ""));
                    270:
                    271:   /* Process input files completely, one by one.  */
                    272:
                    273:   for (i = 0; i < num_infiles; i++)
                    274:     {
                    275:       int desc;
                    276:       long ptr;
                    277:       char *outfile;
                    278:
                    279:       desc = open (infiles[i], O_RDONLY, 0);
                    280:       if (desc < 0)
                    281:        pfatal_with_name (infiles[i]);
                    282:       lseek (desc, (off_t) 0, SEEK_END);
                    283:       ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR);
                    284:
                    285:       close (desc);
                    286:
                    287:       outfile = outfiles[i];
                    288:       if (!outfile)
                    289:        {
                    290:          outfile = concat (infiles[i], "s", "");
                    291:        }
                    292:
                    293:       if (ptr < MAX_IN_CORE_SORT)
                    294:        /* Sort a small amount of data. */
                    295:        sort_in_core (infiles[i], ptr, outfile);
                    296:       else
                    297:        sort_offline (infiles[i], ptr, outfile);
                    298:     }
                    299:
                    300:   flush_tempfiles (tempcount);
                    301:   exit (TI_NO_ERROR);
                    302: }
                    303: 
                    304: typedef struct
                    305: {
                    306:   char *long_name;
                    307:   char *short_name;
                    308:   int *variable_ref;
                    309:   int variable_value;
                    310:   char *arg_name;
                    311:   char *doc_string;
                    312: } TEXINDEX_OPTION;
                    313:
                    314: TEXINDEX_OPTION texindex_options[] = {
                    315:   { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
                    316:       "Keep temporary files around after processing" },
                    317:   { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
                    318:       "Do not keep temporary files around after processing (default)" },
                    319:   { "--output", "-o", (int *)NULL, 0, "FILE",
                    320:       "Send output to FILE" },
                    321:   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
                    322:       "Show version information" },
                    323:   { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" },
                    324:   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
                    325: };
                    326:
                    327: void
                    328: usage (result_value)
                    329:      int result_value;
                    330: {
                    331:   register int i;
                    332:
                    333:   fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name);
                    334:   fprintf (stderr, "  Generate a permuted index for the TeX files given.\n");
                    335:   fprintf (stderr, "  Usually FILE... is `foo.??' for the source file `foo.tex'.\n");
                    336:   fprintf (stderr, "  The OPTIONS are:\n");
                    337:
                    338:   for (i = 0; texindex_options[i].long_name; i++)
                    339:     {
                    340:       fprintf (stderr, "    %s %s",
                    341:               texindex_options[i].long_name,
                    342:               texindex_options[i].arg_name ?
                    343:               texindex_options[i].arg_name : "");
                    344:
                    345:       if (texindex_options[i].short_name)
                    346:        fprintf (stderr, " \n    or %s %s",
                    347:                 texindex_options[i].short_name,
                    348:                 texindex_options[i].arg_name ?
                    349:                 texindex_options[i].arg_name : "");
                    350:       fprintf (stderr, "\t%s\n", texindex_options[i].doc_string);
                    351:     }
                    352:   exit (result_value);
                    353: }
                    354:
                    355: /* Decode the command line arguments to set the parameter variables
                    356:    and set up the vector of keyfields and the vector of input files. */
                    357:
                    358: void
                    359: decode_command (argc, argv)
                    360:      int argc;
                    361:      char **argv;
                    362: {
                    363:   int arg_index = 1;
                    364:   char **ip;
                    365:   char **op;
                    366:
                    367:   /* Store default values into parameter variables. */
                    368:
                    369:   tempdir = getenv ("TMPDIR");
                    370: #ifdef VMS
                    371:   if (tempdir == NULL)
                    372:     tempdir = "sys$scratch:";
                    373: #else
                    374:   if (tempdir == NULL)
                    375:     tempdir = "/tmp/";
                    376:   else
                    377:     tempdir = concat (tempdir, "/", "");
                    378: #endif
                    379:
                    380:   keep_tempfiles = 0;
                    381:
                    382:   /* Allocate ARGC input files, which must be enough.  */
                    383:
                    384:   infiles = (char **) xmalloc (argc * sizeof (char *));
                    385:   outfiles = (char **) xmalloc (argc * sizeof (char *));
                    386:   ip = infiles;
                    387:   op = outfiles;
                    388:
                    389:   while (arg_index < argc)
                    390:     {
                    391:       char *arg = argv[arg_index++];
                    392:
                    393:       if (*arg == '-')
                    394:        {
                    395:          if (strcmp (arg, "--version") == 0)
                    396:            {
                    397:              fprintf (stderr, "%s", TEXINDEX_VERSION_STRING);
                    398: #ifdef TEXINDEX_MODIFICATION_STRING
                    399:              fprintf (stderr, " %s", TEXINDEX_MODIFICATION_STRING);
                    400: #endif
                    401:              fprintf (stderr, "\n");
                    402:              exit (0);
                    403:            }
                    404:          else if ((strcmp (arg, "--keep") == 0) ||
                    405:                   (strcmp (arg, "-k") == 0))
                    406:            {
                    407:              keep_tempfiles = 1;
                    408:            }
                    409:          else if ((strcmp (arg, "--help") == 0) ||
                    410:                   (strcmp (arg, "-h") == 0))
                    411:            {
                    412:              usage (0);
                    413:            }
                    414:          else if ((strcmp (arg, "--output") == 0) ||
                    415:                   (strcmp (arg, "-o") == 0))
                    416:            {
                    417:              if (argv[arg_index] != (char *)NULL)
                    418:                {
                    419:                  arg_index++;
                    420:                  if (op > outfiles)
                    421:                    *(op - 1) = argv[arg_index];
                    422:                }
                    423:              else
                    424:                usage (1);
                    425:            }
                    426:          else
                    427:            usage (1);
                    428:        }
                    429:       else
                    430:        {
                    431:          *ip++ = arg;
                    432:          *op++ = (char *)NULL;
                    433:        }
                    434:     }
                    435:
                    436:   /* Record number of keyfields and terminate list of filenames. */
                    437:   num_infiles = ip - infiles;
                    438:   *ip = (char *)NULL;
                    439:   if (num_infiles == 0)
                    440:     usage (1);
                    441: }
                    442: 
                    443: /* Return a name for a temporary file. */
                    444:
                    445: char *
                    446: maketempname (count)
                    447:      int count;
                    448: {
                    449:   char tempsuffix[10];
                    450:   sprintf (tempsuffix, "%d", count);
                    451:   return concat (tempdir, tempbase, tempsuffix);
                    452: }
                    453:
                    454: /* Delete all temporary files up to TO_COUNT. */
                    455:
                    456: void
                    457: flush_tempfiles (to_count)
                    458:      int to_count;
                    459: {
                    460:   if (keep_tempfiles)
                    461:     return;
                    462:   while (last_deleted_tempcount < to_count)
                    463:     unlink (maketempname (++last_deleted_tempcount));
                    464: }
                    465:
                    466: /* Copy the input file open on IDESC into a temporary file
                    467:    and return the temporary file name. */
                    468:
                    469: #define BUFSIZE 1024
                    470:
                    471: char *
                    472: tempcopy (idesc)
                    473:      int idesc;
                    474: {
                    475:   char *outfile = maketempname (++tempcount);
                    476:   int odesc;
                    477:   char buffer[BUFSIZE];
                    478:
                    479:   odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
                    480:
                    481:   if (odesc < 0)
                    482:     pfatal_with_name (outfile);
                    483:
                    484:   while (1)
                    485:     {
                    486:       int nread = read (idesc, buffer, BUFSIZE);
                    487:       write (odesc, buffer, nread);
                    488:       if (!nread)
                    489:        break;
                    490:     }
                    491:
                    492:   close (odesc);
                    493:
                    494:   return outfile;
                    495: }
                    496: 
                    497: /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
                    498:
                    499: int
                    500: compare_full (line1, line2)
                    501:      u_short **line1, **line2;
                    502: {
                    503:   int i;
                    504:
                    505:   /* Compare using the first keyfield;
                    506:      if that does not distinguish the lines, try the second keyfield;
                    507:      and so on. */
                    508:
                    509:   for (i = 0; i < num_keyfields; i++)
                    510:     {
                    511:       long length1, length2;
                    512:       u_short *start1 = find_field (&keyfields[i], *line1, &length1);
                    513:       u_short *start2 = find_field (&keyfields[i], *line2, &length2);
                    514:       int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
                    515:                               start2, length2, *line2 - text_base);
                    516:       if (tem)
                    517:        {
                    518:          if (keyfields[i].reverse)
                    519:            return -tem;
                    520:          return tem;
                    521:        }
                    522:     }
                    523:
                    524:   return 0;                    /* Lines match exactly. */
                    525: }
                    526:
                    527: /* Compare LINE1 and LINE2, described by structures
                    528:    in which the first keyfield is identified in advance.
                    529:    For positional sorting, assumes that the order of the lines in core
                    530:    reflects their nominal order.  */
                    531:
                    532: int
                    533: compare_prepared (line1, line2)
                    534:      struct lineinfo *line1, *line2;
                    535: {
                    536:   int i;
                    537:   int tem;
                    538:   u_short *text1, *text2;
                    539:
                    540:   /* Compare using the first keyfield, which has been found for us already. */
                    541:   if (keyfields->positional)
                    542:     {
                    543:       if (line1->text - text_base > line2->text - text_base)
                    544:        tem = 1;
                    545:       else
                    546:        tem = -1;
                    547:     }
                    548:   else if (keyfields->numeric)
                    549:     tem = line1->key.number - line2->key.number;
                    550:   else
                    551:     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
                    552:                         line2->key.text, line2->keylen, 0);
                    553:   if (tem)
                    554:     {
                    555:       if (keyfields->reverse)
                    556:        return -tem;
                    557:       return tem;
                    558:     }
                    559:
                    560:   text1 = line1->text;
                    561:   text2 = line2->text;
                    562:
                    563:   /* Compare using the second keyfield;
                    564:      if that does not distinguish the lines, try the third keyfield;
                    565:      and so on. */
                    566:
                    567:   for (i = 1; i < num_keyfields; i++)
                    568:     {
                    569:       long length1, length2;
                    570:       u_short *start1 = find_field (&keyfields[i], text1, &length1);
                    571:       u_short *start2 = find_field (&keyfields[i], text2, &length2);
                    572:       int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
                    573:                               start2, length2, text2 - text_base);
                    574:       if (tem)
                    575:        {
                    576:          if (keyfields[i].reverse)
                    577:            return -tem;
                    578:          return tem;
                    579:        }
                    580:     }
                    581:
                    582:   return 0;                    /* Lines match exactly. */
                    583: }
                    584:
                    585: /* Like compare_full but more general.
                    586:    You can pass any strings, and you can say how many keyfields to use.
                    587:    POS1 and POS2 should indicate the nominal positional ordering of
                    588:    the two lines in the input.  */
                    589:
                    590: int
                    591: compare_general (str1, str2, pos1, pos2, use_keyfields)
                    592:      u_short *str1, *str2;
                    593:      long pos1, pos2;
                    594:      int use_keyfields;
                    595: {
                    596:   int i;
                    597:
                    598:   /* Compare using the first keyfield;
                    599:      if that does not distinguish the lines, try the second keyfield;
                    600:      and so on. */
                    601:
                    602:   for (i = 0; i < use_keyfields; i++)
                    603:     {
                    604:       long length1, length2;
                    605:       u_short *start1 = find_field (&keyfields[i], str1, &length1);
                    606:       u_short *start2 = find_field (&keyfields[i], str2, &length2);
                    607:       int tem = compare_field (&keyfields[i], start1, length1, pos1,
                    608:                               start2, length2, pos2);
                    609:       if (tem)
                    610:        {
                    611:          if (keyfields[i].reverse)
                    612:            return -tem;
                    613:          return tem;
                    614:        }
                    615:     }
                    616:
                    617:   return 0;                    /* Lines match exactly. */
                    618: }
                    619:
                    620: /* Find the start and length of a field in STR according to KEYFIELD.
                    621:    A pointer to the starting character is returned, and the length
                    622:    is stored into the int that LENGTHPTR points to.  */
                    623:
                    624: u_short *
                    625: find_field (keyfield, str, lengthptr)
                    626:      struct keyfield *keyfield;
                    627:      u_short *str;
                    628:      long *lengthptr;
                    629: {
                    630:   u_short *start;
                    631:   u_short *end;
                    632:   u_short *(*fun) ();
                    633:
                    634:   if (keyfield->braced)
                    635:     fun = find_braced_pos;
                    636:   else
                    637:     fun = find_pos;
                    638:
                    639:   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
                    640:                  keyfield->ignore_blanks);
                    641:   if (keyfield->endwords < 0)
                    642:     {
                    643:       if (keyfield->braced)
                    644:        end = find_braced_end (start);
                    645:       else
                    646:        {
                    647:          end = start;
                    648:          while (*end && *end != '\n')
                    649:            end++;
                    650:        }
                    651:     }
                    652:   else
                    653:     {
                    654:       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
                    655:       if (end - str < start - str)
                    656:        end = start;
                    657:     }
                    658:   *lengthptr = end - start;
                    659:   return start;
                    660: }
                    661:
                    662: /* Return a pointer to a specified place within STR,
                    663:    skipping (from the beginning) WORDS words and then CHARS chars.
                    664:    If IGNORE_BLANKS is nonzero, we skip all blanks
                    665:    after finding the specified word.  */
                    666:
                    667: u_short *
                    668: find_pos (str, words, chars, ignore_blanks)
                    669:      u_short *str;
                    670:      int words, chars;
                    671:      int ignore_blanks;
                    672: {
                    673:   int i;
                    674:   u_short *p = str;
                    675:
                    676:   for (i = 0; i < words; i++)
                    677:     {
                    678:       u_short c;
                    679:       /* Find next bunch of nonblanks and skip them. */
                    680:       while ((c = *p) == ' ' || c == '\t')
                    681:        p++;
                    682:       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
                    683:        p++;
                    684:       if (!*p || *p == '\n')
                    685:        return p;
                    686:     }
                    687:
                    688:   while (*p == ' ' || *p == '\t')
                    689:     p++;
                    690:
                    691:   for (i = 0; i < chars; i++)
                    692:     {
                    693:       if (!*p || *p == '\n')
                    694:        break;
                    695:       p++;
                    696:     }
                    697:   return p;
                    698: }
                    699:
                    700: /* Like find_pos but assumes that each field is surrounded by braces
                    701:    and that braces within fields are balanced. */
                    702:
                    703: u_short *
                    704: find_braced_pos (str, words, chars, ignore_blanks)
                    705:      u_short *str;
                    706:      int words, chars;
                    707:      int ignore_blanks;
                    708: {
                    709:   int i;
                    710:   int bracelevel;
                    711:   u_short *p = str;
                    712:   u_short c;
                    713:
                    714:   for (i = 0; i < words; i++)
                    715:     {
                    716:       bracelevel = 1;
                    717:       while ((c = *p++) != '{' && c != '\n' && c)
                    718:        /* Do nothing. */ ;
                    719:       if (c != '{')
                    720:        return p - 1;
                    721:       while (bracelevel)
                    722:        {
                    723:          c = *p++;
                    724:          if (c == '{')
                    725:            bracelevel++;
                    726:          if (c == '}')
                    727:            bracelevel--;
                    728:          if (c == 0 || c == '\n')
                    729:            return p - 1;
                    730:        }
                    731:     }
                    732:
                    733:   while ((c = *p++) != '{' && c != '\n' && c)
                    734:     /* Do nothing. */ ;
                    735:
                    736:   if (c != '{')
                    737:     return p - 1;
                    738:
                    739:   if (ignore_blanks)
                    740:     while ((c = *p) == ' ' || c == '\t')
                    741:       p++;
                    742:
                    743:   for (i = 0; i < chars; i++)
                    744:     {
                    745:       if (!*p || *p == '\n')
                    746:        break;
                    747:       p++;
                    748:     }
                    749:   return p;
                    750: }
                    751:
                    752: /* Find the end of the balanced-brace field which starts at STR.
                    753:    The position returned is just before the closing brace. */
                    754:
                    755: u_short *
                    756: find_braced_end (str)
                    757:      u_short *str;
                    758: {
                    759:   int bracelevel;
                    760:   u_short *p = str;
                    761:   u_short c;
                    762:
                    763:   bracelevel = 1;
                    764:   while (bracelevel)
                    765:     {
                    766:       c = *p++;
                    767:       if (c == '{')
                    768:        bracelevel++;
                    769:       if (c == '}')
                    770:        bracelevel--;
                    771:       if (c == 0 || c == '\n')
                    772:        return p - 1;
                    773:     }
                    774:   return p - 1;
                    775: }
                    776:
                    777: long
                    778: find_value (start, length)
                    779:     u_short *start;
                    780:     long length;
                    781:     {
                    782:     long       val;
                    783:
                    784:     while (length != 0L)
                    785:        {
                    786:        if (isdigit (*start))
                    787:            break;
                    788:        length--;
                    789:        start++;
                    790:        }
                    791:
                    792:     val = 0;
                    793:     while(isdigit(*start) && length > 0L)
                    794:        {
                    795:        val = val*10 + *start;
                    796:        start++;
                    797:        length--;
                    798:        }
                    799:
                    800:     return val;
                    801:     }
                    802:
                    803: /* Vector used to translate characters for comparison.
                    804:    This is how we make all alphanumerics follow all else,
                    805:    and ignore case in the first sorting.  */
                    806: int char_order[0x8000];
                    807:
                    808: void
                    809: init_char_order (void)
                    810:     {
                    811:     int i;
                    812:
                    813:     for (i = 1; i < 0x8000; i++)
                    814:        char_order[i] = i;
                    815:
                    816:     for (i = '0'; i <= '9'; i++)
                    817:        char_order[i] += 512;
                    818:
                    819:     for (i = 'a'; i <= 'z'; i++)
                    820:        {
                    821:        char_order[i] = 512 + i;
                    822:        char_order[i + 'A' - 'a'] = 512 + i;
                    823:        }
                    824:
                    825:     /* 以下のシンボルは無視 (i.e. char_order[XXX] = 0) */
                    826:     for (i = 0x2121; i <= 0x2132; i++) /* 全角空白 〜 _ */
                    827:         char_order[i] = 0;
                    828:     for (i = 0x213D; i <= 0x215B; i++) /* − 〜 】 */
                    829:         char_order[i] = 0;
                    830:
                    831:     /* 促音、拗音、濁音、半濁音、片仮名/平仮名の処理   */
                    832:     /*     1.  平仮名と片仮名、同じに扱う  */
                    833:     /*         ぁ/ァ(2421/2521)〜ん/ン(2473/2573)  */
                    834:     for (i = 0x2421; i < 0x2474; i++)
                    835:        char_order[i + 0x100] = i;
                    836:
                    837:     /*     2.  促音、拗音、濁音、半濁音、類    */
                    838:     /* ぁ〜ぉ →       あ〜お  */
                    839:     for (i = 0x2421; i < 0x242B; i += 2)
                    840:        char_order[i] = char_order[i + 0x100]    = i+1;
                    841:     /* が〜ぢ →       か〜ち  */
                    842:     for (i = 0x242C; i < 0x2443; i += 2)
                    843:        char_order[i] = char_order[i + 0x100]    = i-1;
                    844:     /* っ */
                    845:     char_order[0x2443] = char_order[0x2543]    = 0x2443+1;
                    846:     /* づ〜ど →       つ〜と */
                    847:     for (i = 0x2445; i < 0x244A; i += 2)
                    848:        char_order[i] = char_order[i + 0x100]    = i-1;
                    849:     /* ばぱ〜ぼぽ      →  は〜ほ  */
                    850:     for (i = 0x2450; i < 0x245E; i += 3)
                    851:        {
                    852:        char_order[i]   = char_order[i + 0x100]     = i-1;
                    853:        char_order[i+1] = char_order[i+1 + 0x100]   = i-1;
                    854:        }
                    855:     /* ゃ〜ょ →       や〜よ  */
                    856:     for (i = 0x2463; i < 0x2469; i += 2)
                    857:        char_order[i] = char_order[i + 0x100] = i+1;
                    858:     /* ゎ      →  わ  */
                    859:     char_order[0x246E] = char_order[0x256E]    = 0x246E +1;
                    860:
                    861:     /*     3.  特殊片仮名の扱い    */
                    862:     char_order[0x2574] = 0x2426;       /* ヴ   →  う */
                    863:     char_order[0x2575] = 0x242B;       /* ヵ   →  か */
                    864:     char_order[0x2576] = 0x2431;       /* ヶ   →  け */
                    865:
                    866:     /* JISX0208のアルファベット、数字  */
                    867:     for (i = 0x2330; i <= 0x2339; i++) /* 0〜9 -> 0-9 */
                    868:         char_order[i] = i - 0x2330 + 512;
                    869:     for (i = 0x2341; i <= 0x235A; i++) /* A〜Z、a〜z -> A-Z */
                    870:         char_order[i] = char_order[i + (0x2361 - 0x2341)]
                    871:          = 512 + (i -  0x2341) + 'a';
                    872:
                    873:     /* 長音処理は比較ルーチンで行なう */
                    874:
                    875:     init_Vowel_Table();
                    876:     }
                    877:
                    878: /* Compare two fields (each specified as a start pointer and a character count)
                    879:    according to KEYFIELD.
                    880:    The sign of the value reports the relation between the fields. */
                    881:
                    882: int
                    883: compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
                    884:      struct keyfield *keyfield;
                    885:      u_short *start1;
                    886:      long length1;
                    887:      long pos1;
                    888:      u_short *start2;
                    889:      long length2;
                    890:      long pos2;
                    891: {
                    892:   if (keyfields->positional)
                    893:     {
                    894:       if (pos1 > pos2)
                    895:        return 1;
                    896:       else
                    897:        return -1;
                    898:     }
                    899:   if (keyfield->numeric)
                    900:     {
                    901:       long value = find_value (start1, length1) - find_value (start2, length2);
                    902:       if (value > 0)
                    903:        return 1;
                    904:       if (value < 0)
                    905:        return -1;
                    906:       return 0;
                    907:     }
                    908:   else
                    909:     {
                    910:     /* lexical ってか? */
                    911:       u_short *p1 = start1;
                    912:       u_short *p2 = start2;
                    913:       u_short *e1 = start1 + length1;
                    914:       u_short *e2 = start2 + length2;
                    915:       u_short  lastchar1 = 0, lastchar2 = 0;
                    916:       int   c1, c2;
                    917:
                    918:       do
                    919:          {
                    920:          int   co1, co2;
                    921:
                    922:          /* char_order[X] が 0 のもの(シンボル)は無視する */
                    923:          c1 = c2 = 0;
                    924:          while (p1 < e1)
                    925:            {
                    926:              if (char_order[c1 = *p1++]) break;
                    927:              c1 = 0;
                    928:            }
                    929:          while (p2 < e2)
                    930:            {
                    931:              if (char_order[c2 = *p2++]) break;
                    932:              c2 = 0;
                    933:            }
                    934:
                    935:          /* 長音処理 */
                    936:          if (c1 == 0x213C)                     /* ー */
                    937:              c1    = Vowel_of(lastchar1);
                    938:          if (c2 == 0x213C)                     /* ー */
                    939:              c2    = Vowel_of(lastchar2);
                    940:          co1   = char_order[c1];
                    941:          co2   = char_order[c2];
                    942:
                    943:          if (co1 != co2)
                    944:              return co1 - co2;
                    945:          else
                    946:              lastchar1 = c1,   lastchar2 = c2;
                    947:
                    948:          } while(c1);
                    949:
                    950:       /* Strings are equal except possibly for case.  */
                    951:       p1 = start1;
                    952:       p2 = start2;
                    953:       while (1)
                    954:        {
                    955:          int c1, c2;
                    956:
                    957:          if (p1 == e1)
                    958:            c1 = 0;
                    959:          else
                    960:            c1 = *p1++;
                    961:          if (p2 == e2)
                    962:            c2 = 0;
                    963:          else
                    964:            c2 = *p2++;
                    965:
                    966:          if (c1 != c2)
                    967:            /* Reverse sign here so upper case comes out last.  */
                    968:            return c2 - c1;
                    969:          if (!c1)
                    970:            break;
                    971:        }
                    972:
                    973:       return 0;
                    974:     }
                    975: }
                    976: 
                    977: /* A `struct linebuffer' is a structure which holds a line of text.
                    978:    `readline' reads a line from a stream into a linebuffer
                    979:    and works regardless of the length of the line.  */
                    980:
                    981: struct linebuffer
                    982: {
                    983:   long size;
                    984:   u_short *buffer;
                    985: };
                    986:
                    987: /* Initialize LINEBUFFER for use. */
                    988:
                    989: void
                    990: initbuffer (linebuffer)
                    991:      struct linebuffer *linebuffer;
                    992: {
                    993:   linebuffer->size = 200;
                    994:   linebuffer->buffer = (u_short *) xmalloc (200*sizeof(u_short));
                    995: }
                    996:
                    997: /* Read a line of text from STREAM into LINEBUFFER.
                    998:    Return the length of the line.  */
                    999:
                   1000: long
                   1001: readline (linebuffer, stream)
                   1002:      struct linebuffer *linebuffer;
                   1003:      FILE *stream;
                   1004: {
                   1005:   u_short *buffer = linebuffer->buffer;
                   1006:   u_short *p = linebuffer->buffer;
                   1007:   u_short *end = p + linebuffer->size;
                   1008:
                   1009:   while (1)
                   1010:     {
                   1011:       int c = getc (stream);
                   1012:       if (p == end)
                   1013:        {
                   1014:          buffer =
                   1015:              (u_short *) xrealloc (buffer,
                   1016:                                    (linebuffer->size *= 2)*sizeof(u_short));
                   1017:          p += buffer - linebuffer->buffer;
                   1018:          end += buffer - linebuffer->buffer;
                   1019:          linebuffer->buffer = buffer;
                   1020:        }
                   1021:       if (c < 0 || c == '\n')
                   1022:        {
                   1023:          *p = 0;
                   1024:          break;
                   1025:        }
                   1026:       *p++ = c;
                   1027:     }
                   1028:
                   1029:   return p - buffer;
                   1030: }
                   1031: 
                   1032: /* Sort an input file too big to sort in core.  */
                   1033:
                   1034: void
                   1035: sort_offline (infile, nfiles, total, outfile)
                   1036:      char *infile;
                   1037:      int nfiles;
                   1038:      long total;
                   1039:      char *outfile;
                   1040: {
                   1041:   /* More than enough. */
                   1042:   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
                   1043:   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
                   1044:   FILE *istream = fopen (infile, "r");
                   1045:   int i;
                   1046:   struct linebuffer lb;
                   1047:   long linelength;
                   1048:   int failure = 0;
                   1049:
                   1050:   initbuffer (&lb);
                   1051:
                   1052:   /* Read in one line of input data.  */
                   1053:
                   1054:   linelength = readline (&lb, istream);
                   1055:
                   1056:   if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
                   1057:     {
                   1058:       error ("%s: not a texinfo index file", infile);
                   1059:       return;
                   1060:     }
                   1061:
                   1062:   /* Split up the input into `ntemps' temporary files, or maybe fewer,
                   1063:      and put the new files' names into `tempfiles' */
                   1064:
                   1065:   for (i = 0; i < ntemps; i++)
                   1066:     {
                   1067:       char *outname = maketempname (++tempcount);
                   1068:       FILE *ostream = fopen (outname, "w");
                   1069:       long tempsize = 0;
                   1070:
                   1071:       if (!ostream)
                   1072:        pfatal_with_name (outname);
                   1073:       tempfiles[i] = outname;
                   1074:
                   1075:       /* Copy lines into this temp file as long as it does not make file
                   1076:         "too big" or until there are no more lines.  */
                   1077:
                   1078:       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
                   1079:        {
                   1080:          tempsize += linelength + 1;
                   1081:              {
                   1082:              u_short   *tp;
                   1083:
                   1084:              for (tp = lb.buffer; *tp; tp++)
                   1085:                  putc(*tp, ostream);
                   1086:              }
                   1087:          putc ('\n', ostream);
                   1088:
                   1089:          /* Read another line of input data.  */
                   1090:
                   1091:          linelength = readline (&lb, istream);
                   1092:          if (!linelength && feof (istream))
                   1093:            break;
                   1094:
                   1095:          if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
                   1096:            {
                   1097:              error ("%s: not a texinfo index file", infile);
                   1098:              failure = 1;
                   1099:              goto fail;
                   1100:            }
                   1101:        }
                   1102:       fclose (ostream);
                   1103:       if (feof (istream))
                   1104:        break;
                   1105:     }
                   1106:
                   1107:   free (lb.buffer);
                   1108:
                   1109: fail:
                   1110:   /* Record number of temp files we actually needed.  */
                   1111:
                   1112:   ntemps = i;
                   1113:
                   1114:   /* Sort each tempfile into another tempfile.
                   1115:     Delete the first set of tempfiles and put the names of the second
                   1116:     into `tempfiles'. */
                   1117:
                   1118:   for (i = 0; i < ntemps; i++)
                   1119:     {
                   1120:       char *newtemp = maketempname (++tempcount);
                   1121:       sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
                   1122:       if (!keep_tempfiles)
                   1123:        unlink (tempfiles[i]);
                   1124:       tempfiles[i] = newtemp;
                   1125:     }
                   1126:
                   1127:   if (failure)
                   1128:     return;
                   1129:
                   1130:   /* Merge the tempfiles together and indexify. */
                   1131:
                   1132:   merge_files (tempfiles, ntemps, outfile);
                   1133: }
                   1134: 
                   1135: /* Sort INFILE, whose size is TOTAL,
                   1136:    assuming that is small enough to be done in-core,
                   1137:    then indexify it and send the output to OUTFILE (or to stdout).  */
                   1138:
                   1139: void
                   1140: sort_in_core (infile, total, outfile)
                   1141:      char *infile;
                   1142:      long total;
                   1143:      char *outfile;
                   1144: {
                   1145:   u_short **nextline;
                   1146:   u_short *data = (u_short *) xmalloc ((total + 1)*sizeof(u_short));
                   1147:   u_short *file_data;
                   1148:   long file_size;
                   1149:   int i;
                   1150:   FILE *ostream = stdout;
                   1151:   struct lineinfo *lineinfo;
                   1152:   FILE *ifp;
                   1153:   int  c;
                   1154:
                   1155:   /* Read the contents of the file into the moby array `data'. */
                   1156:   ifp  = fopen(infile, "r");
                   1157:   if (!ifp)
                   1158:     fatal ("failure reopening %s", infile);
                   1159:   memset(data, 0, (total + 1)*sizeof(u_short));        /* zero clear */
                   1160:   for (i = 0, c = fgetc(ifp); c != EOF; c = fgetc(ifp), i++)
                   1161:       {
                   1162:       if (EUC_BYTE(c))
                   1163:          {
                   1164:          data[i] = (c & 0x007F) << 8;
                   1165:          c = fgetc(ifp) & 0x007F;
                   1166:          }
                   1167:       data[i]  += c;
                   1168:       }
                   1169:   file_size = i;
                   1170:   file_data = data;
                   1171:   data[file_size] = 0;
                   1172:
                   1173:   fclose(ifp);
                   1174:
                   1175:   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
                   1176:     {
                   1177:       error ("%s: not a texinfo index file", infile);
                   1178:       return;
                   1179:     }
                   1180:
                   1181:   init_char_order ();
                   1182:
                   1183:   /* Sort routines want to know this address. */
                   1184:
                   1185:   text_base = data;
                   1186:
                   1187:   /* Create the array of pointers to lines, with a default size
                   1188:      frequently enough.  */
                   1189:
                   1190:   nlines = total / 50;
                   1191:   if (!nlines)
                   1192:     nlines = 2;
                   1193:   linearray = (u_short **) xmalloc (nlines * sizeof (u_short *));
                   1194:
                   1195:   /* `nextline' points to the next free slot in this array.
                   1196:      `nlines' is the allocated size.  */
                   1197:
                   1198:   nextline = linearray;
                   1199:
                   1200:   /* Parse the input file's data, and make entries for the lines.  */
                   1201:
                   1202:   nextline = parsefile (nextline, file_data, file_size);
                   1203:
                   1204:   if (nextline == 0)
                   1205:     {
                   1206:       error ("%s: not a texinfo index file", infile);
                   1207:       return;
                   1208:     }
                   1209:
                   1210:   /* Sort the lines. */
                   1211:
                   1212:   /* If we have enough space, find the first keyfield of each line in advance.
                   1213:      Make a `struct lineinfo' for each line, which records the keyfield
                   1214:      as well as the line, and sort them.  */
                   1215:
                   1216:   lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
                   1217:
                   1218:   if (lineinfo)
                   1219:     {
                   1220:       struct lineinfo *lp;
                   1221:       u_short **p;
                   1222:
                   1223:       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
                   1224:        {
                   1225:          lp->text = *p;
                   1226:          lp->key.text = find_field (keyfields, *p, &lp->keylen);
                   1227:          if (keyfields->numeric)
                   1228:            lp->key.number = find_value (lp->key.text, lp->keylen);
                   1229:        }
                   1230:
                   1231:       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
                   1232:
                   1233:       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
                   1234:          *p = lp->text;
                   1235:
                   1236:       free (lineinfo);
                   1237:     }
                   1238:   else
                   1239:     qsort (linearray, nextline - linearray, sizeof (u_short *), compare_full);
                   1240:
                   1241:   /* Open the output file. */
                   1242:
                   1243:   if (outfile)
                   1244:     {
                   1245:       ostream = fopen (outfile, "w");
                   1246:       if (!ostream)
                   1247:        pfatal_with_name (outfile);
                   1248:     }
                   1249:
                   1250:   writelines (linearray, nextline - linearray, ostream);
                   1251:   if (outfile)
                   1252:     fclose (ostream);
                   1253:
                   1254:   free (linearray);
                   1255:   free (data);
                   1256: }
                   1257: 
                   1258: /* Parse an input string in core into lines.
                   1259:    DATA is the input string, and SIZE is its length.
                   1260:    Data goes in LINEARRAY starting at NEXTLINE.
                   1261:    The value returned is the first entry in LINEARRAY still unused.
                   1262:    Value 0 means input file contents are invalid.  */
                   1263:
                   1264: u_short **
                   1265: parsefile (nextline, data, size)
                   1266:      u_short **nextline;
                   1267:      u_short *data;
                   1268:      long size;
                   1269: {
                   1270:   u_short *p, *end;
                   1271:   u_short **line = nextline;
                   1272:
                   1273:   p = data;
                   1274:   end = p + size;
                   1275:   *end = 0;
                   1276:
                   1277:   while (p != end)
                   1278:     {
                   1279:       if (p[0] != '\\' && p[0] != '@')
                   1280:        return 0;
                   1281:
                   1282:       *line = p;
                   1283:       while (*p && *p != '\n')
                   1284:        p++;
                   1285:       if (p != end)
                   1286:        p++;
                   1287:
                   1288:       line++;
                   1289:       if (line == linearray + nlines)
                   1290:        {
                   1291:          u_short **old = linearray;
                   1292:          linearray =
                   1293:              (u_short **) xrealloc (linearray,
                   1294:                                     sizeof (u_short *) * (nlines *= 4));
                   1295:          line += linearray - old;
                   1296:        }
                   1297:     }
                   1298:
                   1299:   return line;
                   1300: }
                   1301: 
                   1302: /* Indexification is a filter applied to the sorted lines
                   1303:    as they are being written to the output file.
                   1304:    Multiple entries for the same name, with different page numbers,
                   1305:    get combined into a single entry with multiple page numbers.
                   1306:    The first braced field, which is used for sorting, is discarded.
                   1307:    However, its first character is examined, folded to lower case,
                   1308:    and if it is different from that in the previous line fed to us
                   1309:    a \initial line is written with one argument, the new initial.
                   1310:
                   1311:    If an entry has four braced fields, then the second and third
                   1312:    constitute primary and secondary names.
                   1313:    In this case, each change of primary name
                   1314:    generates a \primary line which contains only the primary name,
                   1315:    and in between these are \secondary lines which contain
                   1316:    just a secondary name and page numbers. */
                   1317:
                   1318: /* The last primary name we wrote a \primary entry for.
                   1319:    If only one level of indexing is being done, this is the last name seen. */
                   1320: u_short *lastprimary;
                   1321: /* Length of storage allocated for lastprimary. */
                   1322: int lastprimarylength;
                   1323:
                   1324: /* Similar, for the secondary name. */
                   1325: u_short *lastsecondary;
                   1326: int lastsecondarylength;
                   1327:
                   1328: /* Zero if we are not in the middle of writing an entry.
                   1329:    One if we have written the beginning of an entry but have not
                   1330:    yet written any page numbers into it.
                   1331:    Greater than one if we have written the beginning of an entry
                   1332:    plus at least one page number. */
                   1333: int pending;
                   1334:
                   1335: /* The initial (for sorting purposes) of the last primary entry written.
                   1336:    When this changes, a \initial {c} line is written */
                   1337:
                   1338: u_short *lastinitial;
                   1339:
                   1340: int lastinitiallength;
                   1341:
                   1342: /* When we need a string of length 1 for the value of lastinitial,
                   1343:    store it here.  */
                   1344:
                   1345: u_short lastinitial1[2];
                   1346:
                   1347: /* Initialize static storage for writing an index. */
                   1348:
                   1349: void
                   1350: init_index ()
                   1351: {
                   1352:   pending = 0;
                   1353:   lastinitial = lastinitial1;
                   1354:   lastinitial1[0] = 0;
                   1355:   lastinitial1[1] = 0;
                   1356:   lastinitiallength = 0;
                   1357:   lastprimarylength = 100;
                   1358:   lastprimary = (u_short *) xmalloc ((lastprimarylength + 1)*sizeof(u_short));
                   1359:   memset (lastprimary, '\0', (lastprimarylength + 1)*sizeof(u_short));
                   1360:   lastsecondarylength = 100;
                   1361:   lastsecondary =
                   1362:       (u_short *) xmalloc ((lastsecondarylength + 1)*sizeof(u_short));
                   1363:   memset (lastsecondary, '\0', (lastsecondarylength + 1)*sizeof(u_short));
                   1364: }
                   1365:
                   1366: /* Indexify.  Merge entries for the same name,
                   1367:    insert headers for each initial character, etc.  */
                   1368:
                   1369: void
                   1370: indexify (u_short *line, FILE *ostream)
                   1371: {
                   1372:   u_short *primary, *secondary, *pagenumber;
                   1373:   int primarylength, secondarylength = 0, pagelength;
                   1374:   int nosecondary;
                   1375:   int initiallength;
                   1376:   u_short *initial;
                   1377:   u_short initial1[2];
                   1378:   register u_short *p;
                   1379:
                   1380:   /* First, analyze the parts of the entry fed to us this time. */
                   1381:
                   1382:   p = find_braced_pos (line, 0, 0, 0);
                   1383:   if (*p == '{')
                   1384:     {
                   1385:       initial = p;
                   1386:       /* Get length of inner pair of braces starting at `p',
                   1387:         including that inner pair of braces.  */
                   1388:       initiallength = find_braced_end (p + 1) + 1 - p;
                   1389:     }
                   1390:   else
                   1391:     {
                   1392:       initial = initial1;
                   1393:       initial1[0] = *p & 0x7F7F;
                   1394:       initial1[1] = 0;
                   1395:       initiallength = 1;
                   1396:
                   1397:       if (initial1[0] >= 'a' && initial1[0] <= 'z')
                   1398:        initial1[0] -= ' ';
                   1399:       else if (initial1[0] > 0x2000)
                   1400:        initial1[0] = char_order[initial1[0]];
                   1401:     }
                   1402:
                   1403:   pagenumber = find_braced_pos (line, 1, 0, 0);
                   1404:   pagelength = find_braced_end (pagenumber) - pagenumber;
                   1405:   if (pagelength == 0)
                   1406:     abort ();
                   1407:
                   1408:   primary = find_braced_pos (line, 2, 0, 0);
                   1409:   primarylength = find_braced_end (primary) - primary;
                   1410:
                   1411:   secondary = find_braced_pos (line, 3, 0, 0);
                   1412:   nosecondary = !*secondary;
                   1413:   if (!nosecondary)
                   1414:     secondarylength = find_braced_end (secondary) - secondary;
                   1415:
                   1416:   /* If the primary is different from before, make a new primary entry. */
                   1417:   if (wstrncmp (primary, lastprimary, primarylength))
                   1418:     {
                   1419:       /* Close off current secondary entry first, if one is open. */
                   1420:       if (pending)
                   1421:        {
                   1422:          fputs ("}\n", ostream);
                   1423:          pending = 0;
                   1424:        }
                   1425:
                   1426:       /* If this primary has a different initial, include an entry for
                   1427:         the initial. */
                   1428:       if (initiallength != lastinitiallength ||
                   1429:          (*initial && wstrncmp (initial, lastinitial, initiallength)))
                   1430:        {
                   1431:          fprintf (ostream, "\\initial {");
                   1432:          wfwrite (initial, initiallength, 1, ostream);
                   1433:          fprintf (ostream, "}\n", initial);
                   1434:          if (initial == initial1)
                   1435:            {
                   1436:              lastinitial = lastinitial1;
                   1437:              *lastinitial1 = *initial1;
                   1438:            }
                   1439:          else
                   1440:            {
                   1441:              lastinitial = initial;
                   1442:            }
                   1443:          lastinitiallength = initiallength;
                   1444:        }
                   1445:
                   1446:       /* Make the entry for the primary.  */
                   1447:       if (nosecondary)
                   1448:        fputs ("\\entry {", ostream);
                   1449:       else
                   1450:        fputs ("\\primary {", ostream);
                   1451:       wfwrite (primary, primarylength, 1, ostream);
                   1452:       if (nosecondary)
                   1453:        {
                   1454:          fputs ("}{", ostream);
                   1455:          pending = 1;
                   1456:        }
                   1457:       else
                   1458:        fputs ("}\n", ostream);
                   1459:
                   1460:       /* Record name of most recent primary. */
                   1461:       if (lastprimarylength < primarylength)
                   1462:        {
                   1463:          lastprimarylength = primarylength + 100;
                   1464:          lastprimary =
                   1465:              (u_short *) xrealloc (lastprimary,
                   1466:                                    (1 + lastprimarylength)*sizeof(u_short));
                   1467:        }
                   1468:       memcpy (lastprimary, primary, primarylength*sizeof(u_short));
                   1469:       lastprimary[primarylength] = 0;
                   1470:
                   1471:       /* There is no current secondary within this primary, now. */
                   1472:       lastsecondary[0] = 0;
                   1473:     }
                   1474:
                   1475:   /* Should not have an entry with no subtopic following one with a subtopic. */
                   1476:
                   1477:   if (nosecondary && *lastsecondary)
                   1478:       {
                   1479:       char  tline[2001];
                   1480:       int   len;
                   1481:
                   1482:       len   = wtoc(tline, line, 2000);         /* 2000 is much enough ? */
                   1483:       tline[len]   = '\0';
                   1484:       error ("entry %s follows an entry with a secondary name", tline);
                   1485:       }
                   1486:
                   1487:   /* Start a new secondary entry if necessary. */
                   1488:   if (!nosecondary && wstrncmp (secondary, lastsecondary, secondarylength))
                   1489:     {
                   1490:       if (pending)
                   1491:        {
                   1492:          fputs ("}\n", ostream);
                   1493:          pending = 0;
                   1494:        }
                   1495:
                   1496:       /* Write the entry for the secondary.  */
                   1497:       fputs ("\\secondary {", ostream);
                   1498:       wfwrite (secondary, secondarylength, 1, ostream);
                   1499:       fputs ("}{", ostream);
                   1500:       pending = 1;
                   1501:
                   1502:       /* Record name of most recent secondary. */
                   1503:       if (lastsecondarylength < secondarylength)
                   1504:        {
                   1505:          lastsecondarylength = secondarylength + 100;
                   1506:          lastsecondary =
                   1507:              (u_short *) xrealloc ((lastsecondary,
                   1508:                                     1 + lastsecondarylength)*sizeof(u_short));
                   1509:        }
                   1510:       memcpy (lastsecondary, secondary, secondarylength * sizeof(u_short));
                   1511:       lastsecondary[secondarylength] = 0;
                   1512:     }
                   1513:
                   1514:   /* Here to add one more page number to the current entry. */
                   1515:   if (pending++ != 1)
                   1516:     fputs (", ", ostream);     /* Punctuate first, if this is not the first. */
                   1517:   wfwrite (pagenumber, pagelength, 1, ostream);
                   1518: }
                   1519:
                   1520: /* Close out any unfinished output entry. */
                   1521:
                   1522: void
                   1523: finish_index (ostream)
                   1524:      FILE *ostream;
                   1525: {
                   1526:   if (pending)
                   1527:     fputs ("}\n", ostream);
                   1528:   free (lastprimary);
                   1529:   free (lastsecondary);
                   1530: }
                   1531: 
                   1532: /* Copy the lines in the sorted order.
                   1533:    Each line is copied out of the input file it was found in. */
                   1534:
                   1535: void
                   1536: writelines (linearray, nlines, ostream)
                   1537:      u_short **linearray;
                   1538:      int nlines;
                   1539:      FILE *ostream;
                   1540: {
                   1541:   u_short **stop_line = linearray + nlines;
                   1542:   u_short **next_line;
                   1543:
                   1544:   init_index ();
                   1545:
                   1546:   /* Output the text of the lines, and free the buffer space. */
                   1547:
                   1548:   for (next_line = linearray; next_line != stop_line; next_line++)
                   1549:     {
                   1550:       /* If -u was specified, output the line only if distinct from previous one.  */
                   1551:       if (next_line == linearray
                   1552:       /* Compare previous line with this one, using only the
                   1553:          explicitly specd keyfields. */
                   1554:          || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
                   1555:        {
                   1556:          u_short *p = *next_line;
                   1557:          u_short c;
                   1558:
                   1559:          while ((c = *p++) && c != '\n')
                   1560:            /* Do nothing. */ ;
                   1561:          *(p - 1) = 0;
                   1562:          indexify (*next_line, ostream);
                   1563:        }
                   1564:     }
                   1565:
                   1566:   finish_index (ostream);
                   1567: }
                   1568: 
                   1569: /* Assume (and optionally verify) that each input file is sorted;
                   1570:    merge them and output the result.
                   1571:    Returns nonzero if any input file fails to be sorted.
                   1572:
                   1573:    This is the high-level interface that can handle an unlimited
                   1574:    number of files.  */
                   1575:
                   1576: #define MAX_DIRECT_MERGE 10
                   1577:
                   1578: int
                   1579: merge_files (infiles, nfiles, outfile)
                   1580:      char **infiles;
                   1581:      int nfiles;
                   1582:      char *outfile;
                   1583: {
                   1584:   char **tempfiles;
                   1585:   int ntemps;
                   1586:   int i;
                   1587:   int value = 0;
                   1588:   int start_tempcount = tempcount;
                   1589:
                   1590:   if (nfiles <= MAX_DIRECT_MERGE)
                   1591:     return merge_direct (infiles, nfiles, outfile);
                   1592:
                   1593:   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
                   1594:      making a temporary file to hold each group's result.  */
                   1595:
                   1596:   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
                   1597:   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
                   1598:   for (i = 0; i < ntemps; i++)
                   1599:     {
                   1600:       int nf = MAX_DIRECT_MERGE;
                   1601:       if (i + 1 == ntemps)
                   1602:        nf = nfiles - i * MAX_DIRECT_MERGE;
                   1603:       tempfiles[i] = maketempname (++tempcount);
                   1604:       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
                   1605:     }
                   1606:
                   1607:   /* All temporary files that existed before are no longer needed
                   1608:      since their contents have been merged into our new tempfiles.
                   1609:      So delete them.  */
                   1610:   flush_tempfiles (start_tempcount);
                   1611:
                   1612:   /* Now merge the temporary files we created.  */
                   1613:
                   1614:   merge_files (tempfiles, ntemps, outfile);
                   1615:
                   1616:   free (tempfiles);
                   1617:
                   1618:   return value;
                   1619: }
                   1620: 
                   1621: /* Assume (and optionally verify) that each input file is sorted;
                   1622:    merge them and output the result.
                   1623:    Returns nonzero if any input file fails to be sorted.
                   1624:
                   1625:    This version of merging will not work if the number of
                   1626:    input files gets too high.  Higher level functions
                   1627:    use it only with a bounded number of input files.  */
                   1628:
                   1629: int
                   1630: merge_direct (infiles, nfiles, outfile)
                   1631:      char **infiles;
                   1632:      int nfiles;
                   1633:      char *outfile;
                   1634: {
                   1635:   struct linebuffer *lb1, *lb2;
                   1636:   struct linebuffer **thisline, **prevline;
                   1637:   FILE **streams;
                   1638:   int i;
                   1639:   int nleft;
                   1640:   int lossage = 0;
                   1641:   int *file_lossage;
                   1642:   struct linebuffer *prev_out = 0;
                   1643:   FILE *ostream = stdout;
                   1644:
                   1645:   if (outfile)
                   1646:     {
                   1647:       ostream = fopen (outfile, "w");
                   1648:     }
                   1649:   if (!ostream)
                   1650:     pfatal_with_name (outfile);
                   1651:
                   1652:   init_index ();
                   1653:
                   1654:   if (nfiles == 0)
                   1655:     {
                   1656:       if (outfile)
                   1657:        fclose (ostream);
                   1658:       return 0;
                   1659:     }
                   1660:
                   1661:   /* For each file, make two line buffers.
                   1662:      Also, for each file, there is an element of `thisline'
                   1663:      which points at any time to one of the file's two buffers,
                   1664:      and an element of `prevline' which points to the other buffer.
                   1665:      `thisline' is supposed to point to the next available line from the file,
                   1666:      while `prevline' holds the last file line used,
                   1667:      which is remembered so that we can verify that the file is properly sorted. */
                   1668:
                   1669:   /* lb1 and lb2 contain one buffer each per file. */
                   1670:   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
                   1671:   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
                   1672:
                   1673:   /* thisline[i] points to the linebuffer holding the next available line in file i,
                   1674:      or is zero if there are no lines left in that file.  */
                   1675:   thisline = (struct linebuffer **)
                   1676:     xmalloc (nfiles * sizeof (struct linebuffer *));
                   1677:   /* prevline[i] points to the linebuffer holding the last used line
                   1678:      from file i.  This is just for verifying that file i is properly
                   1679:      sorted.  */
                   1680:   prevline = (struct linebuffer **)
                   1681:     xmalloc (nfiles * sizeof (struct linebuffer *));
                   1682:   /* streams[i] holds the input stream for file i.  */
                   1683:   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
                   1684:   /* file_lossage[i] is nonzero if we already know file i is not
                   1685:      properly sorted.  */
                   1686:   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
                   1687:
                   1688:   /* Allocate and initialize all that storage. */
                   1689:
                   1690:   for (i = 0; i < nfiles; i++)
                   1691:     {
                   1692:       initbuffer (&lb1[i]);
                   1693:       initbuffer (&lb2[i]);
                   1694:       thisline[i] = &lb1[i];
                   1695:       prevline[i] = &lb2[i];
                   1696:       file_lossage[i] = 0;
                   1697:       streams[i] = fopen (infiles[i], "r");
                   1698:       if (!streams[i])
                   1699:        pfatal_with_name (infiles[i]);
                   1700:
                   1701:       readline (thisline[i], streams[i]);
                   1702:     }
                   1703:
                   1704:   /* Keep count of number of files not at eof. */
                   1705:   nleft = nfiles;
                   1706:
                   1707:   while (nleft)
                   1708:     {
                   1709:       struct linebuffer *best = 0;
                   1710:       struct linebuffer *exch;
                   1711:       int bestfile = -1;
                   1712:       int i;
                   1713:
                   1714:       /* Look at the next avail line of each file; choose the least one.  */
                   1715:
                   1716:       for (i = 0; i < nfiles; i++)
                   1717:        {
                   1718:          if (thisline[i] &&
                   1719:              (!best ||
                   1720:               0 < compare_general (best->buffer, thisline[i]->buffer,
                   1721:                                 (long) bestfile, (long) i, num_keyfields)))
                   1722:            {
                   1723:              best = thisline[i];
                   1724:              bestfile = i;
                   1725:            }
                   1726:        }
                   1727:
                   1728:       /* Output that line, unless it matches the previous one and we
                   1729:         don't want duplicates. */
                   1730:
                   1731:       if (!(prev_out &&
                   1732:            !compare_general (prev_out->buffer,
                   1733:                              best->buffer, 0L, 1L, num_keyfields - 1)))
                   1734:        indexify (best->buffer, ostream);
                   1735:       prev_out = best;
                   1736:
                   1737:       /* Now make the line the previous of its file, and fetch a new
                   1738:         line from that file.  */
                   1739:
                   1740:       exch = prevline[bestfile];
                   1741:       prevline[bestfile] = thisline[bestfile];
                   1742:       thisline[bestfile] = exch;
                   1743:
                   1744:       while (1)
                   1745:        {
                   1746:          /* If the file has no more, mark it empty. */
                   1747:
                   1748:          if (feof (streams[bestfile]))
                   1749:            {
                   1750:              thisline[bestfile] = 0;
                   1751:              /* Update the number of files still not empty. */
                   1752:              nleft--;
                   1753:              break;
                   1754:            }
                   1755:          readline (thisline[bestfile], streams[bestfile]);
                   1756:          if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
                   1757:            break;
                   1758:        }
                   1759:     }
                   1760:
                   1761:   finish_index (ostream);
                   1762:
                   1763:   /* Free all storage and close all input streams. */
                   1764:
                   1765:   for (i = 0; i < nfiles; i++)
                   1766:     {
                   1767:       fclose (streams[i]);
                   1768:       free (lb1[i].buffer);
                   1769:       free (lb2[i].buffer);
                   1770:     }
                   1771:   free (file_lossage);
                   1772:   free (lb1);
                   1773:   free (lb2);
                   1774:   free (thisline);
                   1775:   free (prevline);
                   1776:   free (streams);
                   1777:
                   1778:   if (outfile)
                   1779:     fclose (ostream);
                   1780:
                   1781:   return lossage;
                   1782: }
                   1783: 
                   1784: /* Print error message and exit.  */
                   1785:
                   1786: void
                   1787: fatal (format, arg)
                   1788:      char *format, *arg;
                   1789: {
                   1790:   error (format, arg);
                   1791:   exit (TI_FATAL_ERROR);
                   1792: }
                   1793:
                   1794: /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
                   1795: void
                   1796: error (format, arg)
                   1797:      char *format, *arg;
                   1798: {
                   1799:   printf ("%s: ", program_name);
                   1800:   printf (format, arg);
                   1801:   if (format[strlen (format) -1] != '\n')
                   1802:     printf ("\n");
                   1803: }
                   1804:
                   1805: void
                   1806: perror_with_name (name)
                   1807:      char *name;
                   1808: {
                   1809:   char *s;
                   1810:
                   1811:   s = strerror (errno);
                   1812:   printf ("%s: ", program_name);
                   1813:   printf ("%s; for file `%s'.\n", s, name);
                   1814: }
                   1815:
                   1816: void
                   1817: pfatal_with_name (name)
                   1818:      char *name;
                   1819: {
                   1820:   char *s;
                   1821:
                   1822:   s = strerror (errno);
                   1823:   printf ("%s: ", program_name);
                   1824:   printf ("%s; for file `%s'.\n", s, name);
                   1825:   exit (TI_FATAL_ERROR);
                   1826: }
                   1827:
                   1828: /* Return a newly-allocated string whose contents concatenate those of
                   1829:    S1, S2, S3.  */
                   1830:
                   1831: char *
                   1832: concat (s1, s2, s3)
                   1833:      char *s1, *s2, *s3;
                   1834: {
                   1835:   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
                   1836:   char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
                   1837:
                   1838:   strcpy (result, s1);
                   1839:   strcpy (result + len1, s2);
                   1840:   strcpy (result + len1 + len2, s3);
                   1841:   *(result + len1 + len2 + len3) = 0;
                   1842:
                   1843:   return result;
                   1844: }
                   1845:
                   1846: #if !defined (HAVE_STRERROR)
                   1847: extern char *sys_errlist[];
                   1848: extern int sys_nerr;
                   1849:
                   1850: char *
                   1851: strerror (num)
                   1852:      int num;
                   1853: {
                   1854:   if (num >= sys_nerr)
                   1855:     return ("");
                   1856:   else
                   1857:     return (sys_errlist[num]);
                   1858: }
                   1859: #endif /* !HAVE_STRERROR */
                   1860:
                   1861: #if !defined (HAVE_STRCHR)
                   1862: char *
                   1863: strrchr (string, character)
                   1864:      char *string;
                   1865:      int character;
                   1866: {
                   1867:   register int i;
                   1868:
                   1869:   for (i = strlen (string) - 1; i > -1; i--)
                   1870:     if (string[i] == character)
                   1871:       return (string + i);
                   1872:
                   1873:   return ((char *)NULL);
                   1874: }
                   1875: #endif /* HAVE_STRCHR */
                   1876:
                   1877: /* Just like malloc, but kills the program in case of fatal error. */
                   1878: void *
                   1879: xmalloc (nbytes)
                   1880:      int nbytes;
                   1881: {
                   1882:   void *temp = (void *) malloc (nbytes);
                   1883:
                   1884:   if (nbytes && temp == (void *)NULL)
                   1885:     memory_error ("xmalloc", nbytes);
                   1886:
                   1887:   return (temp);
                   1888: }
                   1889:
                   1890: /* Like realloc (), but barfs if there isn't enough memory. */
                   1891: void *
                   1892: xrealloc (pointer, nbytes)
                   1893:      void *pointer;
                   1894:      int nbytes;
                   1895: {
                   1896:   void *temp;
                   1897:
                   1898:   if (!pointer)
                   1899:     temp = (void *)xmalloc (nbytes);
                   1900:   else
                   1901:     temp = (void *)realloc (pointer, nbytes);
                   1902:
                   1903:   if (nbytes && !temp)
                   1904:     memory_error ("xrealloc", nbytes);
                   1905:
                   1906:   return (temp);
                   1907: }
                   1908:
                   1909: void
                   1910: memory_error (callers_name, bytes_wanted)
                   1911:      char *callers_name;
                   1912:      int bytes_wanted;
                   1913: {
                   1914:   char printable_string[80];
                   1915:
                   1916:   sprintf (printable_string,
                   1917:           "Virtual memory exhausted in %s ()!  Needed %d bytes.",
                   1918:           callers_name, bytes_wanted);
                   1919:
                   1920:   error (printable_string);
                   1921:   abort();
                   1922: }
                   1923:
                   1924: 
                   1925: static int
                   1926: wstrncmp(const u_short *s1, const u_short *s2, size_t n)
                   1927:     {
                   1928:     int            i;
                   1929:     int            d = 0;
                   1930:
                   1931:     for (i=0; i<n && d == 0; i++)
                   1932:        d   = s1[i] - s2[i];
                   1933:
                   1934:     return  d;
                   1935:     }
                   1936:
                   1937:
                   1938: static int
                   1939: wfwrite(u_short *ptr, size_t  size,  size_t  nmemb,  FILE*stream)
                   1940:     {
                   1941:     char    *cptr;
                   1942:     int            ret, len;
                   1943:
                   1944:     cptr    = (char *)xmalloc(size * nmemb * sizeof(u_short)); /* 丼一杯! */
                   1945:     len        = wtoc(cptr, ptr, size*nmemb);
                   1946:     assert(len <= (size * nmemb * sizeof(u_short)));
                   1947:
                   1948:     ret        = fwrite(cptr, len, nmemb, stream);
                   1949:     free(cptr);
                   1950:     return  ret;
                   1951:     }
                   1952:
                   1953:
                   1954: static int
                   1955: wtoc(char *cptr, u_short const *ptr, int len)
                   1956:     {
                   1957:     char       *tptr;
                   1958:     u_short        c;
                   1959:     int                    i;
                   1960:
                   1961:     tptr    = cptr;
                   1962:     for (i = 0; i < len; i++)
                   1963:        {
                   1964:        c   = ptr[i];
                   1965:        if (G1_CHAR(c))
                   1966:            {
                   1967:            *tptr++ = c >> 8 | 0x0080;  /* to EUC */
                   1968:            c   &= 0x007F;
                   1969:            c   |= 0x0080;
                   1970:            }
                   1971:        *tptr++ = c;
                   1972:        }
                   1973:
                   1974:     return  tptr - cptr;
                   1975:     }
                   1976:
                   1977:
                   1978: static int
                   1979: Vowel_of(int c)
                   1980:     {
                   1981:     if (0x2521 <= c && c < 0x2577)             /* 片仮名は */
                   1982:        c   -= 0x100;                           /* 平仮名へ */
                   1983:     /* ヴ、ヵ、ヶ、の3字は相当する平仮名はないが、コードがあると仮定する */
                   1984:     /* 普通は問題にならない筈。        */
                   1985:
                   1986:     if (0x2421 <= c && c < 0x2474)
                   1987:        return  Vowel_Table[c - 0x2420];
                   1988:     else
                   1989:        return  c;                              /* 平仮名以外はそのまま */
                   1990:     }
                   1991:
                   1992:
                   1993: static void
                   1994: init_Vowel_Table(void)
                   1995:     {
                   1996:     int            i;
                   1997:
                   1998:     /* あ〜ぞ */
                   1999:     for (i = 0x2421 - 0x2420; i < 0x2445 - 0x2420; i += 10)
                   2000:        {
                   2001:        Vowel_Table[i]      = Vowel_Table[i+1]    = 0x2422; /* あ段 */
                   2002:        Vowel_Table[i+2]    = Vowel_Table[i+3]    = 0x2424; /* い段 */
                   2003:        Vowel_Table[i+4]    = Vowel_Table[i+5]    = 0x2426; /* う段 */
                   2004:        Vowel_Table[i+6]    = Vowel_Table[i+7]    = 0x2428; /* え段 */
                   2005:        Vowel_Table[i+8]    = Vowel_Table[i+9]    = 0x242A; /* お段 */
                   2006:        }
                   2007:     Vowel_Table[0x243F - 0x2420]    = 0x2422;  /* た */
                   2008:     Vowel_Table[0x2440 - 0x2420]    = 0x2422;  /* だ */
                   2009:     Vowel_Table[0x2441 - 0x2420]    = 0x2424;  /* ち */
                   2010:     Vowel_Table[0x2442 - 0x2420]    = 0x2424;  /* ぢ */
                   2011:     Vowel_Table[0x2443 - 0x2420]    = 0x2426;  /* っ */
                   2012:     Vowel_Table[0x2444 - 0x2420]    = 0x2426;  /* つ */
                   2013:     Vowel_Table[0x2445 - 0x2420]    = 0x2426;  /* づ */
                   2014:     Vowel_Table[0x2446 - 0x2420]    = 0x2428;  /* て */
                   2015:     Vowel_Table[0x2447 - 0x2420]    = 0x2428;  /* で */
                   2016:     Vowel_Table[0x2448 - 0x2420]    = 0x242A;  /* と */
                   2017:     Vowel_Table[0x2449 - 0x2420]    = 0x242A;  /* ど */
                   2018:     Vowel_Table[0x244A - 0x2420]    = 0x2422;  /* な */
                   2019:     Vowel_Table[0x244B - 0x2420]    = 0x2424;  /* に */
                   2020:     Vowel_Table[0x244C - 0x2420]    = 0x2426;  /* ぬ */
                   2021:     Vowel_Table[0x244D - 0x2420]    = 0x2428;  /* ね */
                   2022:     Vowel_Table[0x244E - 0x2420]    = 0x242A;  /* の */
                   2023:     /* は〜ぽ */
                   2024:     for (i = 0x244F - 0x2420; i < 0x245E - 0x2420; i += 15)
                   2025:        {
                   2026:        Vowel_Table[i]      = Vowel_Table[i+1]  = Vowel_Table[i+2]  = 0x2422;
                   2027:        Vowel_Table[i+3]    = Vowel_Table[i+4]  = Vowel_Table[i+5]  = 0x2424;
                   2028:        Vowel_Table[i+6]    = Vowel_Table[i+7]  = Vowel_Table[i+8]  = 0x2426;
                   2029:        Vowel_Table[i+9]    = Vowel_Table[i+11] = Vowel_Table[i+12] = 0x2428;
                   2030:        Vowel_Table[i+12]   = Vowel_Table[i+13] = Vowel_Table[i+14] = 0x242A;
                   2031:        }
                   2032:     Vowel_Table[0x245E - 0x2420]    = 0x2422;  /* ま */
                   2033:     Vowel_Table[0x245F - 0x2420]    = 0x2424;  /* み */
                   2034:     Vowel_Table[0x2460 - 0x2420]    = 0x2426;  /* む */
                   2035:     Vowel_Table[0x2461 - 0x2420]    = 0x2428;  /* め */
                   2036:     Vowel_Table[0x2462 - 0x2420]    = 0x242A;  /* も */
                   2037:     Vowel_Table[0x2463 - 0x2420]    = 0x2422;  /* ゃ */
                   2038:     Vowel_Table[0x2464 - 0x2420]    = 0x2422;  /* や */
                   2039:     Vowel_Table[0x2465 - 0x2420]    = 0x2426;  /* ゅ */
                   2040:     Vowel_Table[0x2466 - 0x2420]    = 0x2426;  /* ゆ */
                   2041:     Vowel_Table[0x2467 - 0x2420]    = 0x242A;  /* ょ */
                   2042:     Vowel_Table[0x2468 - 0x2420]    = 0x242A;  /* よ */
                   2043:     Vowel_Table[0x2469 - 0x2420]    = 0x2422;  /* ら */
                   2044:     Vowel_Table[0x246A - 0x2420]    = 0x2424;  /* り */
                   2045:     Vowel_Table[0x246B - 0x2420]    = 0x2426;  /* る */
                   2046:     Vowel_Table[0x246C - 0x2420]    = 0x2428;  /* れ */
                   2047:     Vowel_Table[0x246D - 0x2420]    = 0x242A;  /* ろ */
                   2048:     Vowel_Table[0x246E - 0x2420]    = 0x2422;  /* ゎ */
                   2049:     Vowel_Table[0x246F - 0x2420]    = 0x2422;  /* わ */
                   2050:     /* ゐ〜ヶ は、そのまま。母音への変換はしない。 */
                   2051:     for (i = 0x2470 - 0x2420; i < 0x2477 - 0x2420; i++)
                   2052:        Vowel_Table[i]  = i;
                   2053:     }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>