Files
mandarinanki/main.py
2026-03-18 00:50:43 +08:00

202 lines
6.7 KiB
Python

import re
from dataclasses import dataclass
import json
import requests
import wx
import uuid
### Dictionary stuff
line_regex = r"(\S+)\s(\S+)\s\[([^\]]+)\]\s(.+)"
@dataclass
class Word:
simplified: str
traditional: str
pinyin: str
definitions: list[str]
def parse_cedict(path: str):
words = []
with open(path) as file:
for line in file:
if line.startswith("#"):
continue
else:
words.append(parse_line(line))
return words
def parse_line(line: str):
traditional, simplified, pinyin, definitions = re.match(line_regex, line).groups()
return Word(
simplified,
traditional,
pinyin,
[d for d in definitions.split("/") if d],
)
def build_pinyin_index(words: list[Word]):
index = {}
for word in words:
normalized_word = normalize_pinyin(word.pinyin)
if normalized_word in index:
index[normalized_word].append(word)
else:
index[normalized_word] = [word]
return index
def build_traditional_index(words: list[Word]):
index = {}
for word in words:
if word.traditional in index:
index[word.traditional].append(word)
else:
index[word.traditional] = [word]
return index
## Tone markers
pinyinToneMarks = {
u'a': u'āáǎà', u'e': u'ēéěè', u'i': u'īíǐì',
u'o': u'ōóǒò', u'u': u'ūúǔù', u'ü': u'ǖǘǚǜ',
u'A': u'ĀÁǍÀ', u'E': u'ĒÉĚÈ', u'I': u'ĪÍǏÌ',
u'O': u'ŌÓǑÒ', u'U': u'ŪÚǓÙ', u'Ü': u'ǕǗǙǛ'
}
def convert_pinyin_callback(match):
tone= int(match.group(3))
vowel = match.group(1)
# the 5th tone is neutral
if tone == 5:
return match.group(0).replace('5', '')
# for multiple vowels, use first one if it is a/e/o, otherwise use second one
pos=0
if len(vowel) > 1 and not vowel[0] in 'aeoAEO':
pos=1
return vowel[0:pos]+pinyinToneMarks[vowel[pos]][tone-1]+vowel[pos+1:] + match.group(2)
def convert_pinyin(s: str):
return re.sub(r'([aeiou]{1,3})(n?g?r?)([12345])', convert_pinyin_callback, s, flags=re.IGNORECASE)
def normalize_pinyin(s: str):
return re.sub(r'[12345]', '', s).replace(' ', '')
### Ankiconnect
# TODO make deck and model configurable
def add_note(hanzi: str, pinyin: str, definition: str, tags: list[str]):
request_data = {
"version": 5,
"action": "addNote",
"params": {
"note": {
"deckName": "寫漢字",
"modelName": "Mandarin Custom",
"fields": {
"Pinyin": pinyin,
"Character": hanzi,
"Definition": definition
},
"tags": tags
}
}
}
print(json.dumps(request_data))
r = requests.post("http://localhost:8765/", data=json.dumps(request_data))
print(r.json())
return r.json()
### UI stuff
class MainFrame(wx.Frame):
def __init__(self):
# set up the dictionary
dictionary = parse_cedict("./cedict_1_0_ts_utf-8_mdbg.txt")
self.pinyin_idx = build_pinyin_index(dictionary)
self.traditional_idx = build_traditional_index(dictionary)
# IDs of words selected to be in the deck
self.selected_words = []
# IDs of words in results
self.result_words = []
super().__init__(parent=None, title='Mandarin Anki', size = (460, 400))
panel = wx.Panel(self)
vertical_layout = wx.BoxSizer(wx.VERTICAL)
results_vertical_layout = wx.BoxSizer(wx.VERTICAL)
columns_layout = wx.BoxSizer(wx.HORIZONTAL)
# search and results
search_label = wx.StaticText(panel, label="Search:")
self.search = wx.TextCtrl(panel, size = (200, -1))
self.search.Bind(wx.EVT_TEXT, self.OnKeyTyped)
self.results = wx.ListBox(panel, size = (200, 200))
self.results.Bind(wx.EVT_LISTBOX, self.OnWordSelected)
self.selected_hanzi = wx.StaticText(panel, label="")
self.selected_pinyin = wx.StaticText(panel, label="")
self.selected_definitions = wx.CheckListBox(panel, size= (200,200))
tags_label = wx.StaticText(panel, label="Tags (comma separated):")
self.tags = wx.TextCtrl(panel, size = (200, -1))
results_vertical_layout.Add(self.selected_hanzi, 0)
results_vertical_layout.Add(self.selected_pinyin, 0)
results_vertical_layout.Add(self.selected_definitions, 0)
results_vertical_layout.Add(tags_label)
results_vertical_layout.Add(self.tags, 0)
self.add_card = wx.Button(panel, label = "Add card")
self.add_card.Bind(wx.EVT_BUTTON, self.OnAddCard)
results_vertical_layout.Add(self.add_card, 0)
vertical_layout.Add(search_label, 0)
vertical_layout.Add(self.search, 0)
vertical_layout.Add(columns_layout)
columns_layout.Add(self.results, 0, wx.EXPAND)
columns_layout.Add(results_vertical_layout, 0, wx.EXPAND)
panel.SetSizer(vertical_layout)
self.Show()
def OnKeyTyped(self, event):
if event.GetString():
if normalize_pinyin(event.GetString()) in self.pinyin_idx:
matches = self.pinyin_idx[normalize_pinyin(event.GetString())]
self.results.Set([f"{w.traditional} {convert_pinyin(w.pinyin)} {w.definitions[0]}" for w in matches])
self.result_words = matches
elif event.GetString() in self.traditional_idx:
matches = self.traditional_idx[event.GetString()]
self.results.Set([f"{w.traditional} {convert_pinyin(w.pinyin)} {w.definitions[0]}" for w in matches])
self.result_words = matches
else:
self.results.Set([])
self.result_words = []
def OnWordSelected(self, event):
word = self.result_words[event.GetEventObject().GetSelection()]
self.selected_word = word
self.selected_hanzi.SetLabel(word.traditional)
self.selected_pinyin.SetLabel(convert_pinyin(word.pinyin))
self.selected_definitions.Clear()
self.selected_definitions.InsertItems(word.definitions, 0)
def OnAddCard(self, event):
if not self.selected_word or not self.selected_definitions.GetCheckedItems():
return
print("adding note " + self.selected_word.traditional)
tags = [tag.strip() for tag in self.tags.GetValue().split(",")]
add_note(
self.selected_word.traditional,
convert_pinyin(self.selected_word.pinyin),
", ".join(self.selected_definitions.GetCheckedStrings()),
tags
)
if __name__ == '__main__':
app = wx.App()
frame = MainFrame()
app.MainLoop()