4 changed files with 18 additions and 200 deletions
--- a/day-inator/README.md
+++ b/day-inator/README.md
@ -1,22 +0,0 @@
-# day-inator
-
-Script to practice the [Doomsday algorithm](https://www.timeanddate.com/date/doomsday-rule.html).
-You are provided a random date, and are asked to determine its weekday.
-
-See the script source code for more information.
-
-Example usage:
-
-```
-$ python day_inator.py
-
-Day-Inator 2000
---------------
-
-Use 1-7 for Mon-Sun. See script docstring for more information.
-
-2037-11-18
-Guess: 3
-Correct.
-Took 22 seconds with 1 try.
-```
--- a/day-inator/day_inator.py
+++ b/day-inator/day_inator.py
@ -1,63 +0,0 @@
-#!/usr/bin/env python
-"""Guess the weekday of a random date.
-
-For more information, search 'Doomsday algorithm'.
-
-You may pass a date argument as YYYY-MM-DD (leading zeroes required).
-Otherwise, a random date is generated.
-"""
-
-import datetime
-import random
-import sys
-
-epsilon: int = 365 * 200
-"""Maximum variation in days for the date compared to today."""
-
-r = random.randint(-epsilon, epsilon)
-"""Actual variation in days for the date compared to today."""
-
-d: datetime.date = datetime.date.today() + datetime.timedelta(days=1) * r
-"""Date to guess."""
-
-if len(sys.argv) > 1:
-    d = datetime.date.fromisoformat(sys.argv[1])
-
-
-def guess(d: datetime.date) -> bool:
-    """Prompt for guess.
-
-    Returns True if guess is good.
-    """
-    try:
-        ans: int = (int(input("Guess: ")) - 1) % 7
-        if ans not in range(7):
-            raise ValueError
-    except ValueError:
-        print("Invalid input.")
-    else:
-        if ans == d.weekday():
-            return True
-    return False
-
-
-print("""
-Day-Inator 2000
---------------
-
-Use 1-7 for Mon-Sun. See script docstring for more information.
-""")
-
-print(d)
-
-start = datetime.datetime.now()
-fails: int = 0
-
-while not guess(d):
-    print("Wrong.")
-    fails += 1
-
-print("Correct.")
-
-dur = (datetime.datetime.now() - start).seconds
-print(f"Took {dur} seconds with {fails + 1} tr{'ies' if fails else 'y'}.")
--- a/hanzi-flash/README.md
+++ b/hanzi-flash/README.md
@ -1,74 +1,10 @@
 # hanzi-flash

-Generate flashcards in CSV format for a range of frequent hanzi words.
+Generate flashcards for a range of frequent hanzi characters.

-This is based on the
-[hsk CSV](https://github.com/plaktos/hsk_csv)
-repo, including common usage words graded by difficulty.
-These form the vocabulary of the HSK (hanzi proficiency exam).
+There are multiple sources for words:

-## usage
+- [hanziDB CSV](https://github.com/ruddfawcett/hanziDB.csv) (purely frequency based)
+- [hsk CSV](https://github.com/plaktos/hsk_csv) (common usage words, graded by difficulty)

-This script requires the HSK vocabulary in a CSV file.
-The expected format is word, pronunciation in pinyin, and definition.
-You may combine all levels into a single file as such:
-
-```
-git clone https://github.com/plaktos/hsk_csv
-cd hsk_csv
-cat hsk*.csv > all_hsk.csv
-```
-
-To use the script, put this `all_hsk.csv` file in the same directory, or pass the path explicitly with the `-i/--input` flag.
-CSV output goes to stdout, which can be redirected to a file.
-For example, this generates a flashcard deck for the entire HSK vocabulary:
-
-```
-python hanzi_flash.py -i ./all_hsk.csv > output.csv
-```
-
-## ranges
-
-HSK's 6 levels have increasingly large vocabulary.
-This script can help you divide this into more digestible chunks.
-Specify the `-s/--start` and `-e/--end` options to only output a range of characters.
-For example, the first 50 characters:
-
-```
-python hanzi_flash.py -s 1 -e 50
-```
-
-Or, the next 50:
-
-```
-python hanzi_flash.py -s 51 -e 100
-```
-
-Once generated, use your flashcard app's merge feature after importing both these decks.
-
-## single character mode
-
-HSK's vocabulary is in words, not in individual characters.
-Pass the `-S/--single` flag to break up the words into characters.
-The flashcard will have a single character, and the answer will be its pronunciations and example words containing it.
-This is intended as a supplement to the regular word flashcard decks.
-
-Single mode respects the range options above,
-and only outputs new, unique, characters
-that appear first in the given range.
-It will also not duplicate flashcards for words that are single characters.
-
-For example, take the following invocations, with and without single mode:
-
-```
-$ python hanzi_flash.py -s 17 -e 19
-电脑,diàn nǎo (computer)
-电视,diàn shì (television)
-电影,diàn yǐng (movie)
-$ python hanzi_flash.py -s 17 -e 19 --single
-脑,nǎo / 电脑
-视,shì / 电视
-影,yǐng / 电影
-```
-
-Single mode only picks out the new characters (电 was learned before the given range `17-19`).
+Be careful to keep the filenames as `hanzi_db.csv` and `hsk*.csv` because the script tries to recognize these.
--- a/hanzi-flash/hanzi_flash.py
+++ b/hanzi-flash/hanzi_flash.py
@ -2,64 +2,31 @@
 """
 Generate flashcards for a range of frequent hanzi characters.

-See attached README for more information.
+Based on https://github.com/ruddfawcett/hanziDB.csv
 """

 import csv
 import itertools
 import argparse
-import sys
+import re
 from pathlib import Path

 parser = argparse.ArgumentParser()
-parser.add_argument("-s", "--start", default=1, type=int)
+parser.add_argument("-s", "--start", default=0, type=int)
 parser.add_argument("-e", "--end", default=99999999, type=int)
-parser.add_argument("-i", "--input", default="all_hsk.csv", type=Path)
-parser.add_argument(
-    "-S",
-    "--single",
-    action="store_true",
-    help="Output unique single characters instead of words.",
-)
+parser.add_argument("-O", "--output", default="hanzi_flash.csv", type=Path)
+parser.add_argument("-i", "--input", default="hanzi_db.csv", type=Path)
 args = parser.parse_args()

-prev: set[str] = set()
-"""Characters from previous single character card decks."""
+offset = 1

-single: set[str] = set()
-"""Already single characters."""
-
-uniq: dict[str, set[str]] = {}
-"""Character to words mapping."""
-
-prons: dict[str, set[str]] = {}
-"""Character to pronunciations mapping."""
+fname: str = args.input.stem
+if fname.startswith("hsk"):
+    offset = 0

 with open(args.input) as csv_file:
    reader = csv.reader(csv_file)
-    writer = csv.writer(sys.stdout)
-    start = 0 if args.single else args.start - 1
-    for i, row in enumerate(itertools.islice(reader, start, args.end)):
-        word, pron, mean = row[:3]
-        if args.single:
-            if len(word) > 1:
-                for sound, char in zip(pron.lower().split(), word):
-                    if i < args.start - 1:
-                        prev.add(char)
-                    elif char not in prev:
-                        if char not in uniq:
-                            uniq[char] = set()
-                            prons[char] = set()
-                        uniq[char].add(word)
-                        prons[char].add(sound)
-            else:
-                single.add(word[0])
-        else:
-            writer.writerow([word, f"{pron} ({mean})"])
-
-    if args.single:
-        for char in uniq:
-            if char not in single:
-                writer.writerow(
-                    [char, f"{', '.join(prons[char])} / {' '.join(uniq[char])}"]
-                )
+    with open(args.output, "w") as outp_file:
+        writer = csv.writer(outp_file)
+        for row in itertools.islice(reader, args.start, args.end + 1):
+            writer.writerow([row[offset], f"{row[offset+1]} ({row[offset+2]})"])