Created
February 10, 2024 22:02
-
-
Save Luigi-Pizzolito/a80c60f7c8c0b1255b7fcbc7f42e7189 to your computer and use it in GitHub Desktop.
Uses JMDict to convert KANJIDIC2.xml Kanji dictionary into a JSON file containing kanji and their most common Katakana pronounciation. See http://nihongo.monash.edu/kanjidic2/index.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"os" | |
"fmt" | |
// "foosoft.net/projects/jmdict" | |
"git.foosoft.net/alex/jmdict" | |
"encoding/json" | |
) | |
func main() { | |
// Open the file | |
file, err := os.Open("kanjidic2.xml") | |
if err != nil { | |
fmt.Println("Error:", err) | |
return | |
} | |
defer file.Close() | |
// Wrap the file descriptor with bufio.NewReader | |
reader := bufio.NewReader(file) | |
// Load the character dictionary | |
fmt.Println("Loading Kanji Dictionary") | |
kanjidic, err := jmdict.LoadKanjidic(reader) | |
// Iterate and copy to map | |
pronounceMap := make(map[string]string) | |
for _, kanji := range kanjidic.Characters { | |
if kanji.ReadingMeaning != nil { | |
reading := "" | |
readings := kanji.ReadingMeaning.Readings | |
if readings != nil { | |
for _, option := range readings { | |
if option.Type == "ja_on" { | |
reading = option.Value | |
fmt.Println(kanji.Literal, "->", reading) | |
pronounceMap[kanji.Literal] = reading | |
break | |
} | |
} | |
} | |
} | |
} | |
fmt.Println("Extraction done") | |
// Convert the data to JSON | |
jsonData, err := json.Marshal(pronounceMap) | |
if err != nil { | |
fmt.Println("Error marshalling JSON:", err) | |
return | |
} | |
// Write the JSON data to a file | |
file, err = os.Create("kanjidic2_pronounce.json") | |
if err != nil { | |
fmt.Println("Error creating file:", err) | |
return | |
} | |
defer file.Close() | |
_, err = file.Write(jsonData) | |
if err != nil { | |
fmt.Println("Error writing JSON to file:", err) | |
return | |
} | |
fmt.Println("JSON data written to kanjidic2_pronounce.json") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment