Skip to content

Instantly share code, notes, and snippets.

@Luigi-Pizzolito
Created February 10, 2024 22:02
Show Gist options
  • Save Luigi-Pizzolito/a80c60f7c8c0b1255b7fcbc7f42e7189 to your computer and use it in GitHub Desktop.
Save Luigi-Pizzolito/a80c60f7c8c0b1255b7fcbc7f42e7189 to your computer and use it in GitHub Desktop.
Uses JMDict to convert KANJIDIC2.xml Kanji dictionary into a JSON file containing kanji and their most common Katakana pronounciation. See http://nihongo.monash.edu/kanjidic2/index.html
package main
import (
"bufio"
"os"
"fmt"
// "foosoft.net/projects/jmdict"
"git.foosoft.net/alex/jmdict"
"encoding/json"
)
func main() {
// Open the file
file, err := os.Open("kanjidic2.xml")
if err != nil {
fmt.Println("Error:", err)
return
}
defer file.Close()
// Wrap the file descriptor with bufio.NewReader
reader := bufio.NewReader(file)
// Load the character dictionary
fmt.Println("Loading Kanji Dictionary")
kanjidic, err := jmdict.LoadKanjidic(reader)
// Iterate and copy to map
pronounceMap := make(map[string]string)
for _, kanji := range kanjidic.Characters {
if kanji.ReadingMeaning != nil {
reading := ""
readings := kanji.ReadingMeaning.Readings
if readings != nil {
for _, option := range readings {
if option.Type == "ja_on" {
reading = option.Value
fmt.Println(kanji.Literal, "->", reading)
pronounceMap[kanji.Literal] = reading
break
}
}
}
}
}
fmt.Println("Extraction done")
// Convert the data to JSON
jsonData, err := json.Marshal(pronounceMap)
if err != nil {
fmt.Println("Error marshalling JSON:", err)
return
}
// Write the JSON data to a file
file, err = os.Create("kanjidic2_pronounce.json")
if err != nil {
fmt.Println("Error creating file:", err)
return
}
defer file.Close()
_, err = file.Write(jsonData)
if err != nil {
fmt.Println("Error writing JSON to file:", err)
return
}
fmt.Println("JSON data written to kanjidic2_pronounce.json")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment