Path: kddlab!cs.titech!wnoc-tyo-news!sh.wide!wnoc-kyo!oskgate0.mei!synapse!kgway!manmos From: manmos@knowledge.co.jp (Hideo "Sir MaNMOS" Morisita) Newsgroups: fj.sources Subject: Japanese extention for texindex Message-ID: Date: 13 Jul 93 02:35:02 GMT Sender: manmos@kgway.knowledge.co.jp Followup-To: fj.sources.d Distribution: fj Organization: Knowledge. Co., Osaka, Japan Lines: 643 森下 お代官様 MaNMOS@ナレッジです。 以前、texinfoの日本語拡張がfjに流れましたが、それ用のtexindexは流れな かったようですので、作ってみました。 texindex.c 1.45の拡張として作ってあります。(texindex.c 1.45は texinfo-2.15 から texinfo-3.0まで同じものです) 使用方法は patch < このファイル とし、Makefileの DEFSの所に -DEUC or -DSJISを定義してコンパイルして下 さい。 インデックス順は ASCII記号->ASCII & JIS208 数字 -> ASCII & JIS208 アルファベット -> JIS201 & JIS208 仮名 になっています。 見出しは、仮名1文字づつになっています。(あ行か行とかでない、そうする のもテーブルを作れば難しくはないですが…) --------------------------------(cur me)----------------------------- *** texindex.c Sat Dec 12 01:19:49 1992 --- jtexindex.c Tue Jul 13 11:15:17 1993 *************** *** 1,6 **** /* Prepare TeX index dribble output into an actual index. ! Version 1.45 Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc. --- 1,6 ---- /* Prepare TeX index dribble output into an actual index. ! Version 1.45-j1.00 Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc. *************** *** 607,612 **** --- 607,618 ---- while (bracelevel) { c = *p++; + #ifdef SJIS + if ( iskanji(c)) { + p++; + continue; + } + #endif if (c == '{') bracelevel++; if (c == '}') *************** *** 650,655 **** --- 656,667 ---- while (bracelevel) { c = *p++; + #ifdef SJIS + if (iskanji(c)) { + p++; + continue; + } + #endif if (c == '{') bracelevel++; if (c == '}') *************** *** 678,685 **** /* Vector used to translate characters for comparison. This is how we make all alphanumerics follow all else, and ignore case in the first sorting. */ ! int char_order[256]; void init_char_order () { --- 690,1071 ---- /* Vector used to translate characters for comparison. This is how we make all alphanumerics follow all else, and ignore case in the first sorting. */ ! int char_order[0x10000]; /* japanese */ ! ! #ifdef EUC ! #define KANJI ! #endif ! ! #ifdef SJIS ! #define KANJI ! #endif ! ! #ifdef KANJI ! ! #ifndef EUC ! # ifndef SJIS ! # define EUC ! # endif ! #else ! # ifdef SJIS ! # error ! # endif ! #endif ! ! #ifdef EUC ! #define JIS_NUM_0 0xa3b0 ! #define JIS_NUM_9 (JIS_NUM_0 + '9'- '0') ! #define JIS_ALPH_A 0xa3c1 ! #define JIS_ALPH_Z (JIS_ALPH_A + 'Z'-'A') ! #define JIS_ALPH_a 0xa3e1 ! #define JIS_HIRA_SMALL_A 0xa4a1 ! #define JIS_HIRA_NN 0xa4f3 ! #define JIS_KATA_SMALL_A 0xa5a1 ! #define JIS_KATA_SMALL_KE 0xa5f6 ! #define JIS_HIRA_U 0xa4a6 ! #define JIS_KATA_U 0xa5a6 ! #define JIS_KATA_VU 0xa5f4 ! ! #define JIS_KATA_UPPER 0xa5 ! #define JIS_HIRA_UPPER 0xa4 ! ! #define JIS_HIRA_A 0xa4a2 ! #define JIS_KATA_A 0xa5a2 ! ! #define JIS_DAKUTEN 0xa1ab ! #define JIS_HANDAKUTEN 0xa1ac ! #define JIS_CHOUON 0xa2ac ! ! int kana[] = { ! /* . [ ] , . wo a */ ! 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, ! /* i u e o ya yu yo tsu */ ! 0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, ! /* - a i u e o ka ki */ ! 0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, ! /* ku ke ko sa si su se so */ ! 0xa5af, 0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, ! /* ta ti tu te to na ni nu */ ! 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, ! /* ne no ha hi hu he ho ma */ ! 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de, ! /* mi mu me mo ya yu yo ra */ ! 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9, ! /* ri ru re ro wa nn "" maru */ ! 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, JIS_DAKUTEN,JIS_HANDAKUTEN ! }; ! ! int daku[] = {0xa5ac, 0xa5ae, 0xa5b0, 0xa5b2, 0xa5b4, /* ガ*/ ! 0xa5b6, 0xa5b8, 0xa5ba, 0xa5bc, 0xa5be, /* ザ */ ! 0xa5c0, 0xa5c2, 0xa5c5, 0xa5c7, 0xa5c9, /* ダ */ ! 0xa5d0, 0xa5d3, 0xa5d6, 0xa5d9, 0xa5dc}; /* バ */ ! int handaku[] = {0xa5d1, 0xa5d4, 0xa5d7, 0xa5da, 0xa5dd}; /* パ */ ! int dakuall[] = {0xa4ac, 0xa4ae, 0xa4b0, 0xa4b2, 0xa4b4, /* が */ ! 0xa4b6, 0xa4b8, 0xa4ba, 0xa4bc, 0xa4be, /* ざ */ ! 0xa4c0, 0xa4c2, 0xa4c5, 0xa4c7, 0xa4c9, /* だ */ ! 0xa4d0, 0xa4d3, 0xa4d6, 0xa4d9, 0xa4dc, /* ば */ ! 0xa5ac, 0xa5ae, 0xa5b0, 0xa5b2, 0xa5b4, /* ガ */ ! 0xa5b6, 0xa5b8, 0xa5ba, 0xa5bc, 0xa5be, /* ザ */ ! 0xa5c0, 0xa5c2, 0xa5c5, 0xa5c7, 0xa5c9, /* ダ */ ! 0xa5d0, 0xa5d3, 0xa5d6, 0xa5d9, 0xa5dc}; /* バ */ ! int handakuall[] = {0xa4d1, 0xa4d4, 0xa4d7, 0xa4da, 0xa4dd, /* ぱ */ ! 0xa5d1, 0xa5d4, 0xa5d7, 0xa5da, 0xa5dd}; /* パ */ ! ! ! iskanji(c) /* japanses extention */ ! int c; ! { ! if ( !(c & 0x80 )) return 0; ! if ( c == 0x8e ) return 0; ! return 1; ! } ! ! is201kana(c) /* japanses extention */ ! int c; ! { ! if ( c == 0x8e ) return 1; ! else return 0; ! } ! #else /* SJIS */ ! #define JIS_NUM_0 0x824f ! #define JIS_NUM_9 (JIS_NUM_0 + '9'- '0') ! #define JIS_ALPH_A 0x8260 ! #define JIS_ALPH_Z (JIS_ALPH_A + 'Z'-'A') ! #define JIS_ALPH_a 0x8281 ! #define JIS_HIRA_SMALL_A 0x829f ! #define JIS_HIRA_NN 0x82f1 ! #define JIS_KATA_SMALL_A 0x8340 ! #define JIS_KATA_SMALL_KE 0x8396 ! #define JIS_HIRA_U 0x82a4 ! #define JIS_KATA_U 0x83a5 ! #define JIS_KATA_VU 0x8394 ! ! #define JIS_KATA_UPPER 0x83 ! #define JIS_HIRA_UPPER 0x82 ! ! #define JIS_HIRA_A 0x82a0 ! #define JIS_KATA_A 0x8341 ! ! #define JIS_DAKUTEN 0x814a ! #define JIS_HANDAKUTEN 0x814b ! #define JIS_CHOUON 0x815b ! ! int kana[] = { ! /* . [ ] , . wo a */ ! 0x813f, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, ! /* i u e o ya yu yo tsu */ ! 0x8342, 0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, ! /* - a i u e o ka ki */ ! 0x815b, 0x8341, 0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, ! /* ku ke ko sa si su se so */ ! 0x834e, 0x8350, 0x8352, 0x8354, 0x8356, 0x8358, 0x835a, 0x835c, ! /* ta ti tu te to na ni nu */ ! 0x835e, 0x8360, 0x8363, 0x8365, 0x8367, 0x8369, 0x836a, 0x836b, ! /* ne no ha hi hu he ho ma */ ! 0x836c, 0x836d, 0x836e, 0x8371, 0x8374, 0x8377, 0x837a, 0x837d, ! /* mi mu me mo ya yu yo ra */ ! 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386, 0x8388, 0x8389, ! /* ri ru re ro wa nn "" maru */ ! 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, JIS_DAKUTEN,JIS_HANDAKUTEN ! }; ! ! int daku[] = {0x834b, 0x834d, 0x834f, 0x8351, 0x8353, /* ガ*/ ! 0x8355, 0x8357, 0x8359, 0x835b, 0x835d, /* ザ */ ! 0x835f, 0x8361, 0x8364, 0x8366, 0x8367, /* ダ */ ! 0x836f, 0x8372, 0x8375, 0x8378, 0x837b}; /* バ */ ! int handaku[] = {0x8370, 0x8373, 0x8376, 0x8379, 0x837c}; /* パ */ ! int dakuall[] = {0x82aa, 0xa2ac, 0x82a3, 0x82b0, 0x82b2, /* が */ ! 0x82b4, 0x82b6, 0x82b8, 0x82ba, 0x82bc, /* ざ */ ! 0x82be, 0x82c0, 0x82c3, 0x82c5, 0x82c7, /* だ */ ! 0x82ce, 0x82d1, 0x82d4, 0x82d7, 0x82da, /* ば */ ! ! 0x834b, 0x834d, 0x834f, 0x8351, 0x8353, /* ガ*/ ! 0x8355, 0x8357, 0x8359, 0x835b, 0x835d, /* ザ */ ! 0x835f, 0x8361, 0x8364, 0x8366, 0x8367, /* ダ */ ! 0x836f, 0x8372, 0x8375, 0x8378, 0x837b}; /* バ */ ! int handakuall[] = {0x82cf, 0x82d2, 0x82d5, 0x82d8, 0x82db, /* ぱ */ ! 0x8370, 0x8373, 0x8376, 0x8379, 0x837c}; /* パ */ ! ! iskanji(c) /* japanses extention */ ! int c; ! { ! c &= 0xff; ! if (( c >= 0x81 && c <= 0x9f ) || ! ( c >= 0xe0 && c <= 0xea )) return 1; ! return 0; ! } ! ! is201kana(c) /* japanses extention */ ! int c; ! { ! if ( c >= 0xa0 && c <= 0xdf ) return 1; ! else return 0; ! } ! #endif ! ! ! kana_char_order() /* japanese */ ! { ! unsigned int c,i,j,cc; ! ! for ( c = 0x8000 ; c < 0x10000 ; c++ ) ! char_order[c] = c; ! ! /* NUMERIC */ ! for ( c = JIS_NUM_0 , i = '0' + 512; c <= JIS_NUM_9 ; i++,c++) ! char_order[c] = i; ! ! /* ALPHABET */ ! for ( c = JIS_ALPH_A , i = 'a' + 512; c <= JIS_ALPH_Z ; i++,c++) { ! char_order[c] = i; ! char_order[c+JIS_ALPH_a - JIS_ALPH_A] = i; ! } ! ! /* KANA */ ! for ( c = JIS_HIRA_SMALL_A ; c <= JIS_HIRA_NN ; c++ ) { ! char_order[c] = c+512; ! cc = c+JIS_KATA_SMALL_A-JIS_HIRA_SMALL_A; ! #ifdef SJIS ! if ( cc > 0x837e ) cc ++; ! #endif ! char_order[cc] = c+512; ! if (isdaku(c) ) { ! char_order[c] = c+512-1; ! char_order[cc] = c+512-1; ! } else if (ishandaku(c) ) { ! char_order[c] = c+512-2; ! char_order[cc] = c+512-2; ! } ! } ! for ( c = JIS_KATA_VU ; c <= JIS_KATA_SMALL_KE ; c++ ) { ! char_order[c] = c+512; ! if (c == JIS_KATA_VU ) { ! char_order[c] = JIS_KATA_U + 512; ! } ! } ! } ! ! ! #ifdef EUC ! #define KANA_BYTES 2 ! #else ! #define KANA_BYTES 2 ! #endif + static convert_htoz(str,ret) + unsigned char *str; + int *ret; + { + int c,c2; + + int num = KANA_BYTES; + + #ifdef EUC + str++; + #endif + c = *str++; + if (!is201kana(*str)) { + if (c >= 0xa0 && c <= 0xdf) + *ret = kana[c - 0xa0]; + else *ret = 0x8e00 + c; + return num; + } + #ifdef EUC + str++; + #endif + num += KANA_BYTES; + + if ( (c2 = *str++) == 0xde) { /* 濁点 */ + if (c >= 0xb6 && c <= 0xba) /* line-ga */ + c = daku[c - 0xb6]; + else if (c >= 0xbb && c <= 0xbf) /* line-za */ + c = daku[c - 0xbb+5]; + else if (c >= 0xc0 && c <= 0xc4) /* line-da */ + c = daku[c - 0xc0+10]; + else if (c >= 0xca && c <= 0xce) /* line-ba */ + c = daku[c - 0xca+15]; + else if ( c == 0xb3 ) /* vu */ + c = JIS_KATA_VU; + } else if (c2 == 0xdf) { /* 半濁点 */ + if (c >= 0xca && c <= 0xce) /* line-pa */ + c = handaku[c - 0xca]; + } else if ( c2 == 0xb1 ) { /* 長音 */ + if (c >= 0xa0 && c <= 0xdd) + *ret = kana[c - 0xa0]; + else *ret = 0x8e00 + c; + } + if ( c < 0x100 ) c += 0x8e00; + + return num; + } + + isadddaku(c) + { + int i; + for ( i = 0 ; i < sizeof(dakuall) / sizeof(dakuall[1]) ; i++ ) + if ( dakuall[i]-1 == c ) return 1; + return 0; + } + + isaddhandaku(c) + { + int i; + for ( i = 0 ; i < sizeof(handakuall) / sizeof(handakuall[1]) ; i++ ) + if ( handakuall[i]-2 == c ) return 1; + return 0; + } + + isdaku(c) + { + int i; + for ( i = 0 ; i < sizeof(dakuall) / sizeof(dakuall[1]) ; i++ ) + if ( dakuall[i] == c ) return 1; + return 0; + } + + ishandaku(c) + { + int i; + for ( i = 0 ; i < sizeof(handakuall) / sizeof(handakuall[1]) ; i++ ) + if ( handakuall[i] == c ) return 1; + return 0; + } + + int mbchar(str,ret) + unsigned char *str; + int *ret; + { + int c = *str++; + int cc,ccc; + if ( !iskanji(c) && !is201kana(c)) { + *ret = c & 0xff; + return 1; + } + if ( iskanji (c) ) { + cc = ((c & 0xff) << 8) | ((*str++) & 0xff); + ccc = (((*str++ ) & 0xff) << 8 ) | ((*str++) & 0xff); + + if ( ccc == JIS_DAKUTEN ) { + if ( isadddaku(cc)) cc++; + else if ( cc == JIS_KATA_U || cc == JIS_HIRA_U ) + cc = JIS_KATA_VU; + *ret = cc; + return 4; + } else if ( ccc == JIS_HANDAKUTEN ) { + if ( isaddhandaku(cc)) cc+=2; + *ret = cc; + return 4; + } else if ( ccc == JIS_CHOUON ) { + *ret = cc; + return 4; + } else { + *ret = cc; + return 2; + } + } else if ( is201kana(c)) { + /* EUC */ + return convert_htoz(str-1,ret); + } + } + + xinitial(str,c) + unsigned char *str; + int c; + { + int up; + if (isdaku(c)) c--; + else if (ishandaku(c)) c-=2; + else if ( c == JIS_KATA_VU ) c = JIS_KATA_U; + + if ((up= (c & 0xff00) >>8) == JIS_KATA_UPPER ) { /* katakana */ + c += JIS_HIRA_SMALL_A - JIS_KATA_SMALL_A; + #ifdef SJIS + if ( c > 0x837e ) c ++; + #endif + } + *str++ = (c >> 8 ) & 0xfff; + *str++ = c & 0xff; + *str= 0; + } + + make_initial(src,dst) + unsigned char *src,*dst; + { + int len; + int c; + + len = mbchar(src,&c); + if ( len == 1 ) { + dst[0] =c; + dst[1] = 0; + } else { + xinitial(dst,c); + len = 2; + } + return len; + } + #endif + void init_char_order () { *************** *** 695,700 **** --- 1081,1089 ---- char_order[i] = 512 + i; char_order[i + 'A' - 'a'] = 512 + i; } + #ifdef KANJI + kana_char_order(); /* japanese */ + #endif } /* Compare two fields (each specified as a start pointer and a character count) *************** *** 741,751 **** if (p1 == e1) c1 = 0; else c1 = *p1++; if (p2 == e2) c2 = 0; else ! c2 = *p2++; if (char_order[c1] != char_order[c2]) return char_order[c1] - char_order[c2]; --- 1130,1149 ---- if (p1 == e1) c1 = 0; else + #ifdef KANJI + p1 += mbchar(p1,&c1); /* japanese */ + #else c1 = *p1++; + #endif + if (p2 == e2) c2 = 0; else ! #ifdef KANJI ! p2 += mbchar(p2,&c2); /* japanese */ ! #else ! c2 = *p2++; ! #endif if (char_order[c1] != char_order[c2]) return char_order[c1] - char_order[c2]; *************** *** 763,777 **** if (p1 == e1) c1 = 0; else c1 = *p1++; if (p2 == e2) c2 = 0; else ! c2 = *p2++; ! ! if (c1 != c2) ! /* Reverse sign here so upper case comes out last. */ ! return c2 - c1; if (!c1) break; } --- 1161,1194 ---- if (p1 == e1) c1 = 0; else + #ifdef KANJI + p1 += mbchar(p1,&c1); /* japanese */ + #else c1 = *p1++; + #endif if (p2 == e2) c2 = 0; else ! #ifdef KANJI ! p2 += mbchar(p2,&c2); /* japanese */ ! #else ! c2 = *p2++; ! #endif ! #ifdef KANJI ! if ( iskanji(c1) || is201kana(c1)) { ! if (c1 != c2) ! /* Reverse sign here so upper case comes out last. */ ! return c2 - c1; ! } else { ! if (c1 != c2) ! return c1 - c2; ! } ! #else ! ! if (c1 != c2) ! /* Reverse sign here so upper case comes out last. */ ! return c2 - c1; ! #endif if (!c1) break; } *************** *** 1135,1141 **** /* When we need a string of length 1 for the value of lastinitial, store it here. */ ! char lastinitial1[2]; /* Initialize static storage for writing an index. */ --- 1552,1558 ---- /* When we need a string of length 1 for the value of lastinitial, store it here. */ ! char lastinitial1[3]; /* japanese */ /* Initialize static storage for writing an index. */ *************** *** 1168,1174 **** int nosecondary; int initiallength; char *initial; ! char initial1[2]; register char *p; /* First, analyze the parts of the entry fed to us this time. */ --- 1585,1591 ---- int nosecondary; int initiallength; char *initial; ! char initial1[3]; /* japanese */ register char *p; /* First, analyze the parts of the entry fed to us this time. */ *************** *** 1184,1193 **** else { initial = initial1; initial1[0] = *p; initial1[1] = 0; initiallength = 1; ! if (initial1[0] >= 'a' && initial1[0] <= 'z') initial1[0] -= 040; } --- 1601,1615 ---- else { initial = initial1; + #ifdef KANJI + initiallength = make_initial(p,initial1); /* japanese */ + #else + initial = initial1; initial1[0] = *p; initial1[1] = 0; initiallength = 1; ! #endif ! if (initial1[0] >= 'a' && initial1[0] <= 'z') initial1[0] -= 040; } *************** *** 1227,1232 **** --- 1649,1656 ---- { lastinitial = lastinitial1; *lastinitial1 = *initial1; + lastinitial1[1] = initial1[1]; + lastinitial1[2] = initial1[2]; } else { --------------------------------(cur me)----------------------------- -- ___ わしは、山吹色のかすてーらが大好きでのぅ [[o o]] ふぉっふぉっふぉ 'J' 森下 お代官様 MaNMOS 英夫@knowledge