// SPDX-License-Identifier: MIT // SPDX-FileCopyrightText: 2023 SASANO Takayoshi #include #include #include #define MAX_TOKENS 65535 // should be odd value #define CONV_BUFSIZE 65536 class convert { public: int do_line(wchar_t *buf); wchar_t *token[MAX_TOKENS]; protected: int tokenize(wchar_t *tokens[], wchar_t *in_buf); int check_token(wchar_t *token); int convert_yomi(wchar_t *yomi); }; int convert::do_line(wchar_t *buf) { int i, num; if ((num = tokenize(token, buf)) & 1) { fprintf(stderr, "tokens not even (%d)\n", num); num &= ~1; } for (i = 0; i < num; i += 2) { if (check_token(token[i]) || convert_yomi(token[i + 1])) token[i] = token[i + 1] = NULL; } return num; } int convert::tokenize(wchar_t *tokens[], wchar_t *in) { int i; wchar_t *wp; for (i = 0; i < MAX_TOKENS; i++) { tokens[i] = wcstok(i ? NULL : in, L"\t\n", &wp); if (tokens[i] == NULL) return i; } fprintf(stderr, "too many tokens!\n"); return i; } int convert::convert_yomi(wchar_t *yomi) { size_t i, len = wcslen(yomi); int ret = -1; if (!len) goto fin; /* convert katakana -> hiragana */ for (i = 0; i < wcslen(yomi); i++) { if (yomi[i] != L'ー' && !(yomi[i] >= L'ぁ' && yomi[i] <= L'ん') && !(yomi[i] >= L'ァ' && yomi[i] <= L'ヴ')) goto fin; /* convert to hiragana (except "ー" and "") */ if (yomi[i] >= L'ァ' && yomi[i] < L'ヴ') yomi[i] -= 0x60; } ret = 0; fin: return ret; } int convert::check_token(wchar_t *token) { size_t i; int ret = -1; /* check first character */ if (*token == L'ー' || *token == L'々') goto fin; /* some ideographic marks and hiragana, katakana, CJK is permitted */ for (i = 0; i < wcslen(token); i++) { if (token[i] != L'ー' && token[i] != L'々' && !(token[i] >= L'ぁ' && token[i] <= L'ん') && !(token[i] >= L'ァ' && token[i] <= L'ヶ') && !(token[i] >= 0x4e00 && token[i] <= 0x9fff)) goto fin; } ret = 0; fin: return ret; } int main(int argc, char *argv[]) { int i, n; FILE *fpi = stdin, *fpo = stdout; convert c; wchar_t buf[CONV_BUFSIZE]; setlocale(LC_ALL, "ja_JP.UTF-8"); while (fgetws(buf, CONV_BUFSIZE, fpi) != NULL) { if ((n = c.do_line(buf)) < 2) continue; for (i = 0; i < n; i += 2) { if (c.token[i] == NULL) { fprintf(fpo, " "); } else { fprintf(fpo, "%ls/%ls ", c.token[i + 1], c.token[i]); } } fprintf(fpo, "\n"); } return 0; }