diff options
| author | ilotterytea <iltsu@alright.party> | 2025-07-05 10:01:29 +0500 |
|---|---|---|
| committer | ilotterytea <iltsu@alright.party> | 2025-07-05 10:01:29 +0500 |
| commit | 53545015c2ab7fd6bed82e9fcb3261e80dab61a3 (patch) | |
| tree | 662b3c7d5bcce292e8de9f181e71c586d82d6235 | |
| parent | af1648420b1a343aa95d3cd3abe89c5283c7aaba (diff) | |
| -rwxr-xr-x | train.sh | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/train.sh b/train.sh new file mode 100755 index 0000000..cc172b2 --- /dev/null +++ b/train.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +if [ -z "$1" ]; then + echo "Markov chain generator." + echo "Usage: $0 filename.txt" + exit 1 +fi + +line_count=$(wc -l "$1") +line_count="${line_count%% *}" + +declare -A counts +declare -A totals + +current_line=0 +prev_word="" +while read -r line; do + current_line=$(($current_line + 1)) + for word in $line; do + if [[ -z "$word" ]]; then + continue + fi + + if [ -n "$prev_word" ]; then + counts["$prev_word,$word"]=$((counts["$prev_word,$word"] + 1)) + totals["$prev_word"]=$((totals["$prev_word"] + 1)) + fi + prev_word="$word" + done + echo "Parsing... ($current_line/$line_count)" +done < "$1" + +output_file="output.sql" +origin_name="$1" + +> "$output_file" + +echo "Packing up..." + +escape_sql() { + local s="$1" + s="${s//\'/\'\'}" + s="${s//\\/\\\\}" + echo "$s" +} + +current_line=0 +line_count=${#counts[@]} + +for key in "${!counts[@]}"; do + current_line=$((current_line + 1)) + IFS=',' read -r from to <<< "$key" + efrom=$(escape_sql "$from") + eto=$(escape_sql "$to") + ename=$(escape_sql "$origin_name") + weight=${counts["$key"]} + echo "INSERT INTO chains(origin_name, from_word, to_word, weight) VALUES ('$ename', '$efrom', '$eto', $weight);" >> "$output_file" + echo "Packing up... ($current_line/$line_count)" +done + +echo "Done! Markov chain output saved to '$output_file'! Feed your database with it." |
