// SPDX-License-Identifier: MIT // SPDX-FileCopyrightText: 2023 SASANO Takayoshi #include #include #include #include #include #include #include #include template struct data_entry { std::wstring key; T value; }; template class data_store { public: std::map> db; void set(wchar_t *key, T value) { data_entry d; d.key = key; d.value = value; if (!db.try_emplace(d.key, d).second) { db[key].value += value; fprintf(stderr, "update \"%ls\": %" PRId64 " -> %" PRId64 "\n", key, db[key].value - value, db[key].value); } }; void init(void) { db.clear(); }; }; #define CONV_BUFSIZE 65536 static int64_t count_char(wchar_t *buf, wchar_t c) { int64_t count = 0; for (; *buf; buf++) { if (*buf == c) count++; } return count; } static int64_t do_line(wchar_t *buf) { int64_t ret = -1; wchar_t *p; if ((p = wcsrchr(buf, L'\t')) == NULL) goto fin; *p = L'\0'; if (wcsstr(buf, L"") != NULL || count_char(buf, L' ') != 2) goto fin; ret = wcstoll(p + 1, NULL, 0); fin: return ret; } static wchar_t *read_line(wchar_t *buf, int n, FILE *fp) { wchar_t *p; int c; off_t pos, len; while (1) { if ((p = fgetws(buf, n, fp)) != NULL) return p; /* garbage detected, seek next line */ pos = ftell(fp); for (len = 0; ; len++) { c = fgetc(fp); if (c == EOF || c == '\n') break; } fprintf(stderr, "garbage at % " PRId64 ", %" PRId64 " bytes\n", pos, len); if (c == EOF) break; } return NULL; } int main(int argc, char *argv[]) { char *infile = NULL, *outfile = NULL; FILE *fpi, *fpo; int64_t limit = 0; int ch; data_store d; wchar_t buf[CONV_BUFSIZE]; while ((ch = getopt(argc, argv, "l:i:o:")) != -1) { switch (ch) { case 'l': limit = atoll(optarg); break; case 'i': infile = optarg; break; case 'o': outfile = optarg; break; } } if (infile == NULL || outfile == NULL) { fprintf(stderr, "usage: %s -i [infile] -o [outfile] " "-l [limit]\n", argv[0]); goto fin0; } setlocale(LC_ALL, "ja_JP.UTF-8"); fpi = !strcmp(infile, "-") ? stdin : fopen(infile, "r"); if (fpi == NULL) { fprintf(stderr, "file open error (infile)\n"); goto fin0; } fpo = !strcmp(outfile, "-") ? stdout : fopen(outfile, "w"); if (fpo == NULL) { fprintf(stderr, "file open error (outfile)\n"); goto fin1; } /* pass 1 */ d.init(); while (read_line(buf, CONV_BUFSIZE, fpi) != NULL) { int64_t c; if ((c = do_line(buf)) <= 0) continue; d.set(buf, c); } /* pass 2 */ for (auto i = d.db.begin(); i != d.db.end(); i++) { if (d.db[i->first].value < limit) continue; fprintf(fpo, "%ls\t%" PRId64 "\n", d.db[i->first].key.c_str(), d.db[i->first].value); } fclose(fpo); fin1: fclose(fpi); fin0: return 0; }