diff --git a/hanzi-flash/README.md b/hanzi-flash/README.md index b43682b..17e4da6 100644 --- a/hanzi-flash/README.md +++ b/hanzi-flash/README.md @@ -2,4 +2,9 @@ Generate flashcards for a range of frequent hanzi characters. -You need the [hanziDB CSV](https://github.com/ruddfawcett/hanziDB.csv) file for this to work. +There are multiple sources for words: + +- [hanziDB CSV](https://github.com/ruddfawcett/hanziDB.csv) (purely frequency based) +- [hsk CSV](https://github.com/plaktos/hsk_csv) (common usage words, graded by difficulty) + +Be careful to keep the filenames as `hanzi_db.csv` and `hsk*.csv` because the script tries to recognize these. diff --git a/hanzi-flash/hanzi_flash.py b/hanzi-flash/hanzi_flash.py index 62e5d59..be34340 100644 --- a/hanzi-flash/hanzi_flash.py +++ b/hanzi-flash/hanzi_flash.py @@ -8,6 +8,7 @@ Based on https://github.com/ruddfawcett/hanziDB.csv import csv import itertools import argparse +import re from pathlib import Path parser = argparse.ArgumentParser() @@ -17,9 +18,15 @@ parser.add_argument("-O", "--output", default="hanzi_flash.csv", type=Path) parser.add_argument("-i", "--input", default="hanzi_db.csv", type=Path) args = parser.parse_args() +offset = 1 + +fname: str = args.input.stem +if fname.startswith("hsk"): + offset = 0 + with open(args.input) as csv_file: reader = csv.reader(csv_file) with open(args.output, "w") as outp_file: writer = csv.writer(outp_file) for row in itertools.islice(reader, args.start, args.end + 1): - writer.writerow([row[1], f"{row[2]} ({row[3]})"]) + writer.writerow([row[offset], f"{row[offset+1]} ({row[offset+2]})"])