202 lines
6.7 KiB
Python
202 lines
6.7 KiB
Python
import re
|
|
from dataclasses import dataclass
|
|
import json
|
|
import requests
|
|
import wx
|
|
import uuid
|
|
|
|
### Dictionary stuff
|
|
|
|
line_regex = r"(\S+)\s(\S+)\s\[([^\]]+)\]\s(.+)"
|
|
|
|
@dataclass
|
|
class Word:
|
|
simplified: str
|
|
traditional: str
|
|
pinyin: str
|
|
definitions: list[str]
|
|
|
|
def parse_cedict(path: str):
|
|
words = []
|
|
with open(path) as file:
|
|
for line in file:
|
|
if line.startswith("#"):
|
|
continue
|
|
else:
|
|
words.append(parse_line(line))
|
|
return words
|
|
|
|
def parse_line(line: str):
|
|
traditional, simplified, pinyin, definitions = re.match(line_regex, line).groups()
|
|
return Word(
|
|
simplified,
|
|
traditional,
|
|
pinyin,
|
|
[d for d in definitions.split("/") if d],
|
|
)
|
|
|
|
def build_pinyin_index(words: list[Word]):
|
|
index = {}
|
|
for word in words:
|
|
normalized_word = normalize_pinyin(word.pinyin)
|
|
if normalized_word in index:
|
|
index[normalized_word].append(word)
|
|
else:
|
|
index[normalized_word] = [word]
|
|
return index
|
|
|
|
def build_traditional_index(words: list[Word]):
|
|
index = {}
|
|
for word in words:
|
|
if word.traditional in index:
|
|
index[word.traditional].append(word)
|
|
else:
|
|
index[word.traditional] = [word]
|
|
return index
|
|
|
|
## Tone markers
|
|
|
|
pinyinToneMarks = {
|
|
u'a': u'āáǎà', u'e': u'ēéěè', u'i': u'īíǐì',
|
|
u'o': u'ōóǒò', u'u': u'ūúǔù', u'ü': u'ǖǘǚǜ',
|
|
u'A': u'ĀÁǍÀ', u'E': u'ĒÉĚÈ', u'I': u'ĪÍǏÌ',
|
|
u'O': u'ŌÓǑÒ', u'U': u'ŪÚǓÙ', u'Ü': u'ǕǗǙǛ'
|
|
}
|
|
|
|
def convert_pinyin_callback(match):
|
|
tone= int(match.group(3))
|
|
vowel = match.group(1)
|
|
|
|
# the 5th tone is neutral
|
|
if tone == 5:
|
|
return match.group(0).replace('5', '')
|
|
|
|
# for multiple vowels, use first one if it is a/e/o, otherwise use second one
|
|
pos=0
|
|
if len(vowel) > 1 and not vowel[0] in 'aeoAEO':
|
|
pos=1
|
|
|
|
return vowel[0:pos]+pinyinToneMarks[vowel[pos]][tone-1]+vowel[pos+1:] + match.group(2)
|
|
|
|
def convert_pinyin(s: str):
|
|
return re.sub(r'([aeiou]{1,3})(n?g?r?)([12345])', convert_pinyin_callback, s, flags=re.IGNORECASE)
|
|
|
|
def normalize_pinyin(s: str):
|
|
return re.sub(r'[12345]', '', s).replace(' ', '')
|
|
|
|
### Ankiconnect
|
|
|
|
# TODO make deck and model configurable
|
|
def add_note(hanzi: str, pinyin: str, definition: str, tags: list[str]):
|
|
request_data = {
|
|
"version": 5,
|
|
"action": "addNote",
|
|
"params": {
|
|
"note": {
|
|
"deckName": "寫漢字",
|
|
"modelName": "Mandarin Custom",
|
|
"fields": {
|
|
"Pinyin": pinyin,
|
|
"Character": hanzi,
|
|
"Definition": definition
|
|
},
|
|
"tags": tags
|
|
}
|
|
}
|
|
}
|
|
print(json.dumps(request_data))
|
|
r = requests.post("http://localhost:8765/", data=json.dumps(request_data))
|
|
print(r.json())
|
|
return r.json()
|
|
|
|
### UI stuff
|
|
|
|
class MainFrame(wx.Frame):
|
|
def __init__(self):
|
|
# set up the dictionary
|
|
dictionary = parse_cedict("./cedict_1_0_ts_utf-8_mdbg.txt")
|
|
self.pinyin_idx = build_pinyin_index(dictionary)
|
|
self.traditional_idx = build_traditional_index(dictionary)
|
|
# IDs of words selected to be in the deck
|
|
self.selected_words = []
|
|
# IDs of words in results
|
|
self.result_words = []
|
|
|
|
super().__init__(parent=None, title='Mandarin Anki', size = (460, 400))
|
|
panel = wx.Panel(self)
|
|
vertical_layout = wx.BoxSizer(wx.VERTICAL)
|
|
results_vertical_layout = wx.BoxSizer(wx.VERTICAL)
|
|
columns_layout = wx.BoxSizer(wx.HORIZONTAL)
|
|
|
|
# search and results
|
|
search_label = wx.StaticText(panel, label="Search:")
|
|
self.search = wx.TextCtrl(panel, size = (200, -1))
|
|
self.search.Bind(wx.EVT_TEXT, self.OnKeyTyped)
|
|
|
|
self.results = wx.ListBox(panel, size = (200, 200))
|
|
self.results.Bind(wx.EVT_LISTBOX, self.OnWordSelected)
|
|
|
|
self.selected_hanzi = wx.StaticText(panel, label="")
|
|
self.selected_pinyin = wx.StaticText(panel, label="")
|
|
self.selected_definitions = wx.CheckListBox(panel, size= (200,200))
|
|
tags_label = wx.StaticText(panel, label="Tags (comma separated):")
|
|
self.tags = wx.TextCtrl(panel, size = (200, -1))
|
|
results_vertical_layout.Add(self.selected_hanzi, 0)
|
|
results_vertical_layout.Add(self.selected_pinyin, 0)
|
|
results_vertical_layout.Add(self.selected_definitions, 0)
|
|
results_vertical_layout.Add(tags_label)
|
|
results_vertical_layout.Add(self.tags, 0)
|
|
|
|
self.add_card = wx.Button(panel, label = "Add card")
|
|
self.add_card.Bind(wx.EVT_BUTTON, self.OnAddCard)
|
|
results_vertical_layout.Add(self.add_card, 0)
|
|
|
|
|
|
vertical_layout.Add(search_label, 0)
|
|
vertical_layout.Add(self.search, 0)
|
|
vertical_layout.Add(columns_layout)
|
|
columns_layout.Add(self.results, 0, wx.EXPAND)
|
|
columns_layout.Add(results_vertical_layout, 0, wx.EXPAND)
|
|
|
|
panel.SetSizer(vertical_layout)
|
|
self.Show()
|
|
|
|
def OnKeyTyped(self, event):
|
|
if event.GetString():
|
|
if normalize_pinyin(event.GetString()) in self.pinyin_idx:
|
|
matches = self.pinyin_idx[normalize_pinyin(event.GetString())]
|
|
self.results.Set([f"{w.traditional} {convert_pinyin(w.pinyin)} {w.definitions[0]}" for w in matches])
|
|
self.result_words = matches
|
|
elif event.GetString() in self.traditional_idx:
|
|
matches = self.traditional_idx[event.GetString()]
|
|
self.results.Set([f"{w.traditional} {convert_pinyin(w.pinyin)} {w.definitions[0]}" for w in matches])
|
|
self.result_words = matches
|
|
else:
|
|
self.results.Set([])
|
|
self.result_words = []
|
|
|
|
def OnWordSelected(self, event):
|
|
word = self.result_words[event.GetEventObject().GetSelection()]
|
|
self.selected_word = word
|
|
self.selected_hanzi.SetLabel(word.traditional)
|
|
self.selected_pinyin.SetLabel(convert_pinyin(word.pinyin))
|
|
self.selected_definitions.Clear()
|
|
self.selected_definitions.InsertItems(word.definitions, 0)
|
|
|
|
def OnAddCard(self, event):
|
|
if not self.selected_word or not self.selected_definitions.GetCheckedItems():
|
|
return
|
|
print("adding note " + self.selected_word.traditional)
|
|
tags = [tag.strip() for tag in self.tags.GetValue().split(",")]
|
|
add_note(
|
|
self.selected_word.traditional,
|
|
convert_pinyin(self.selected_word.pinyin),
|
|
", ".join(self.selected_definitions.GetCheckedStrings()),
|
|
tags
|
|
)
|
|
|
|
if __name__ == '__main__':
|
|
app = wx.App()
|
|
frame = MainFrame()
|
|
app.MainLoop()
|