Files
mandarinanki/main.py
2025-08-30 14:46:59 -07:00

186 lines
5.8 KiB
Python

import re
from dataclasses import dataclass
import wx
import genanki
import uuid
### Dictionary stuff
line_regex = r"(\S+)\s(\S+)\s\[([^\]]+)\]\s(.+)"
@dataclass
class Word:
simplified: str
traditional: str
pinyin: str
definitions: list[str]
def parse_cedict(path: str):
words = []
with open(path) as file:
for line in file:
if line.startswith("#"):
continue
else:
words.append(parse_line(line))
return words
def parse_line(line: str):
traditional, simplified, pinyin, definitions = re.match(line_regex, line).groups()
return Word(
simplified,
traditional,
pinyin,
[d for d in definitions.split("/") if d],
)
def build_pinyin_index(words: list[Word]):
index = {}
for word in words:
if word.pinyin in index:
index[word.pinyin].append(word)
else:
index[word.pinyin] = [word]
return index
def build_traditional_index(words: list[Word]):
index = {}
for word in words:
if word.traditional in index:
index[word.traditional].append(word)
else:
index[word.traditional] = [word]
return index
## Tone markers
pinyinToneMarks = {
u'a': u'āáǎà', u'e': u'ēéěè', u'i': u'īíǐì',
u'o': u'ōóǒò', u'u': u'ūúǔù', u'ü': u'ǖǘǚǜ',
u'A': u'ĀÁǍÀ', u'E': u'ĒÉĚÈ', u'I': u'ĪÍǏÌ',
u'O': u'ŌÓǑÒ', u'U': u'ŪÚǓÙ', u'Ü': u'ǕǗǙǛ'
}
def convert_pinyin_callback(match):
tone= int(match.group(3))
vowel = match.group(1)
# for multiple vowels, use first one if it is a/e/o, otherwise use second one
pos=0
if len(vowel) > 1 and not vowel[0] in 'aeoAEO':
pos=1
return vowel[0:pos]+pinyinToneMarks[vowel[pos]][tone-1]+vowel[pos+1:] + match.group(2)
def convert_pinyin(s):
return re.sub(r'([aeiou]{1,3})(n?g?r?)([12345])', convert_pinyin_callback, s, flags=re.IGNORECASE)
### Anki stuff
simple_anki_model = genanki.Model(
1607392319,
'Simple Model',
fields=[
{'name': 'Question'},
{'name': 'Answer'},
],
templates=[
{
'name': 'Card 1',
'qfmt': '<div style="text-align:center;font-size: 1.4rem">{{Question}}</div>',
'afmt': '<div style="text-align:center">{{FrontSide}}<hr id="answer">{{Answer}}</div>',
},
])
def generate_card_from_word(word: Word):
return generate_card(word.traditional, f"{convert_pinyin(word.pinyin)}<br>{word.definitions[0]}")
def generate_card(front: str, back: str):
return genanki.Note(model=simple_anki_model, fields=[front, back])
def generate_deck(cards: list, name: str):
d = genanki.Deck(2059400110, name)
for card in cards:
d.add_note(card)
return d
### UI stuff
class MainFrame(wx.Frame):
def __init__(self):
# set up the dictionary
dictionary = parse_cedict("./cedict_1_0_ts_utf-8_mdbg.txt")
self.pinyin_idx = build_pinyin_index(dictionary)
self.traditional_idx = build_traditional_index(dictionary)
# IDs of words selected to be in the deck
self.selected_words = []
# IDs of words in results
self.result_words = []
super().__init__(parent=None, title='Mandarin Anki', size = (420, 340))
panel = wx.Panel(self)
vertical_layout = wx.BoxSizer(wx.VERTICAL)
columns_layout = wx.BoxSizer(wx.HORIZONTAL)
# search and results
self.search = wx.TextCtrl(panel, size = (200, -1))
self.search.Bind(wx.EVT_TEXT, self.OnKeyTyped)
self.results = wx.ListBox(panel, size = (200, 200))
self.results.Bind(wx.EVT_LISTBOX, self.OnWordSelected)
self.selected = wx.ListBox(panel, size = (200, 200))
self.selected.Bind(wx.EVT_LISTBOX, self.OnWordRemoved)
self.deck_name = wx.TextCtrl(panel, size = (200, -1))
self.create_deck = wx.Button(panel, label = "ank!")
self.create_deck.Bind(wx.EVT_BUTTON, self.OnAnk)
vertical_layout.Add(self.search, 0)
vertical_layout.Add(columns_layout)
vertical_layout.Add(self.deck_name, 0)
vertical_layout.Add(self.create_deck, 0)
columns_layout.Add(self.results, 0, wx.EXPAND)
columns_layout.Add(self.selected, 0, wx.EXPAND)
panel.SetSizer(vertical_layout)
self.Show()
def OnKeyTyped(self, event):
if event.GetString():
if event.GetString() in self.pinyin_idx:
matches = self.pinyin_idx[event.GetString()]
self.results.Set([f"{w.traditional} {convert_pinyin(w.pinyin)} {w.definitions[0]}" for w in matches])
self.result_words = matches
elif event.GetString() in self.traditional_idx:
match = self.traditional_idx[event.GetString()]
self.result_words = [match]
self.results.Set([f"{match.traditional} {convert_pinyin(match.pinyin)} {match.definitions[0]}"])
else:
self.results.Set([])
self.result_words = []
def OnWordSelected(self, event):
word = self.result_words[event.GetEventObject().GetSelection()]
formatted = f"{word.traditional} {convert_pinyin(word.pinyin)} {word.definitions[0]}"
if not word in self.selected_words:
self.selected.InsertItems([formatted], 0)
self.selected_words.insert(0, word)
def OnWordRemoved(self, event):
idx = event.GetEventObject().GetSelection()
self.selected.SetSelection(-1)
self.selected.Delete(idx)
del self.selected_words[idx]
def OnAnk(self, event):
cards = [generate_card_from_word(w) for w in self.selected_words]
deck = generate_deck(cards, self.deck_name.GetValue())
deck.write_to_file(f"{self.deck_name.GetValue().replace(" ", "-")}.apkg")
if __name__ == '__main__':
app = wx.App()
frame = MainFrame()
app.MainLoop()