|
import csv |
|
import hashlib |
|
import json |
|
import random |
|
import time |
|
import sqlite3 as sql |
|
import zipfile |
|
|
|
# monkeypatch repr |
|
class Row(sql.Row): |
|
def __repr__(self) -> str: |
|
return repr(dict(self)) |
|
|
|
# static definitions |
|
|
|
QFMT = r''' |
|
<h1> |
|
<span class="question_id">{{question_id}}</span>: |
|
<span class="question">{{question}}</span> |
|
</h1> |
|
<p><ol> |
|
<li>{{answer_1}}</li> |
|
<li>{{answer_2}}</li> |
|
<li>{{answer_3}}</li> |
|
<li>{{answer_4}}</li> |
|
</ol></p> |
|
'''.strip() |
|
AFMT = QFMT + '\n' + r''' |
|
<hr/> |
|
<p class="answer">{{correct_letter}}. {{correct_answer}}</p> |
|
'''.strip() |
|
CSS = r''' |
|
.question_id { |
|
font-weight: bold; |
|
text-decoration: underline; |
|
} |
|
.question { |
|
font-weight: bold; |
|
} |
|
h1 { |
|
border-bottom: 1px solid #888; |
|
} |
|
ol { |
|
list-style-type: upper-alpha; |
|
} |
|
.answer { |
|
font-weight: bold; |
|
} |
|
'''.strip() |
|
FIELDS = [ |
|
'question_id', |
|
'question', |
|
'answer_1', |
|
'answer_2', |
|
'answer_3', |
|
'answer_4', |
|
'correct_letter', |
|
'correct_answer', |
|
] |
|
INIT_SQL = r''' |
|
-- Cards are what you review. |
|
-- There can be multiple cards for each note, as determined by the Template. |
|
CREATE TABLE cards ( |
|
-- the epoch milliseconds of when the card was created |
|
id integer primary key, |
|
-- notes.id |
|
nid integer not null,-- |
|
-- deck id (available in col table) |
|
did integer not null, |
|
-- ordinal : identifies which of the card templates or cloze deletions it corresponds to |
|
-- for card templates, valid values are from 0 to num templates - 1 |
|
-- for cloze deletions, valid values are from 0 to max cloze index - 1 (they're 0 indexed despite the first being called `c1`) |
|
ord integer not null, |
|
-- modification time as epoch seconds |
|
mod integer not null, |
|
-- update sequence number : used to figure out diffs when syncing. |
|
-- value of -1 indicates changes that need to be pushed to server. |
|
-- usn < server usn indicates changes that need to be pulled from server. |
|
usn integer not null, |
|
-- 0=new, 1=learning, 2=review, 3=relearning |
|
type integer not null, |
|
-- -3=user buried(In scheduler 2), |
|
-- -2=sched buried (In scheduler 2), |
|
-- -2=buried(In scheduler 1), |
|
-- -1=suspended, |
|
-- 0=new, 1=learning, 2=review (as for type) |
|
-- 3=in learning, next rev in at least a day after the previous review |
|
-- 4=preview |
|
queue integer not null, |
|
-- Due is used differently for different card types: |
|
-- new: the order in which cards are to be studied; starts from 1. |
|
-- learning/relearning: epoch timestamp in seconds |
|
-- review: days since the collection's creation time |
|
due integer not null, |
|
-- interval (used in SRS algorithm). Negative = seconds, positive = days |
|
-- v2 scheduler used seconds for (re)learning cards and days for review cards |
|
-- v3 scheduler uses seconds only for intraday (re)learning cards and days for interday (re)learning cards and review cards |
|
ivl integer not null, |
|
-- The ease factor of the card in permille (parts per thousand). If the ease factor is 2500, the card’s interval will be multiplied by 2.5 the next time you press Good. |
|
factor integer not null, |
|
-- number of reviews |
|
reps integer not null, |
|
-- the number of times the card went from a "was answered correctly" |
|
-- to "was answered incorrectly" state |
|
lapses integer not null, |
|
-- of the form a*1000+b, with: |
|
-- a the number of reps left today |
|
-- b the number of reps left till graduation |
|
-- for example: '2004' means 2 reps left today and 4 reps till graduation |
|
left integer not null, |
|
-- original due: In filtered decks, it's the original due date that the card had before moving to filtered. |
|
-- If the card lapsed in scheduler1, then it's the value before the lapse. (This is used when switching to scheduler 2. At this time, cards in learning becomes due again, with their previous due date) |
|
-- In any other case it's 0. |
|
odue integer not null, |
|
-- original did: only used when the card is currently in filtered deck |
|
odid integer not null, |
|
-- an integer. This integer mod 8 represents a "flag", which can be see in browser and while reviewing a note. Red 1, Orange 2, Green 3, Blue 4, no flag: 0. This integer divided by 8 represents currently nothing |
|
flags integer not null, |
|
-- currently unused |
|
data text not null |
|
); |
|
|
|
-- col contains a single row that holds various information about the collection |
|
CREATE TABLE col ( |
|
-- arbitrary number since there is only one row |
|
id integer primary key, |
|
-- timestamp of the creation date in second. It's correct up to the day. For V1 scheduler, the hour corresponds to starting a new day. By default, new day is 4. |
|
crt integer not null, |
|
-- last modified in milliseconds |
|
mod integer not null, |
|
-- schema mod time: time when "schema" was modified. |
|
-- If server scm is different from the client scm a full-sync is required |
|
scm integer not null, |
|
-- version |
|
ver integer not null, |
|
-- dirty: unused, set to 0 |
|
dty integer not null, |
|
-- update sequence number: used for finding diffs when syncing. |
|
-- See usn in cards table for more details. |
|
usn integer not null, |
|
-- "last sync time" |
|
ls integer not null, |
|
-- json object containing configuration options that are synced. Described below in "configuration JSONObjects" |
|
conf text not null, |
|
-- json object of json object(s) representing the models (aka Note types) |
|
-- keys of this object are strings containing integers: "creation time in epoch milliseconds" of the models |
|
-- values of this object are other json objects of the form described below in "Models JSONObjects" |
|
models text not null, |
|
-- json object of json object(s) representing the deck(s) |
|
-- keys of this object are strings containing integers: "deck creation time in epoch milliseconds" for most decks, "1" for the default deck |
|
-- values of this object are other json objects of the form described below in "Decks JSONObjects" |
|
decks text not null, |
|
-- json object of json object(s) representing the options group(s) for decks |
|
-- keys of this object are strings containing integers: "options group creation time in epoch milliseconds" for most groups, "1" for the default option group |
|
-- values of this object are other json objects of the form described below in "DConf JSONObjects" |
|
dconf text not null, |
|
-- a cache of tags used in the collection (This list is displayed in the browser. Potentially at other place) |
|
tags text not null |
|
); |
|
|
|
-- Contains deleted cards, notes, and decks that need to be synced. |
|
-- usn should be set to -1, |
|
-- oid is the original id. |
|
-- type: 0 for a card, 1 for a note and 2 for a deck |
|
CREATE TABLE graves ( |
|
usn integer not null, |
|
oid integer not null, |
|
type integer not null |
|
); |
|
|
|
-- Notes contain the raw information that is formatted into a number of cards |
|
-- according to the models |
|
CREATE TABLE notes ( |
|
-- epoch milliseconds of when the note was created |
|
id integer primary key, |
|
-- globally unique id, almost certainly used for syncing |
|
guid text not null, |
|
-- model id |
|
mid integer not null, |
|
-- modification timestamp, epoch seconds |
|
mod integer not null, |
|
-- update sequence number: for finding diffs when syncing. |
|
-- See the description in the cards table for more info |
|
usn integer not null, |
|
-- space-separated string of tags. |
|
-- includes space at the beginning and end, for LIKE "% tag %" queries |
|
tags text not null, |
|
-- the values of the fields in this note. separated by 0x1f (31) character. |
|
flds text not null, |
|
-- sort field: used for quick sorting and duplicate check. The sort field is an integer so that when users are sorting on a field that contains only numbers, they are sorted in numeric instead of lexical order. Text is stored in this integer field. |
|
sfld integer not null, |
|
-- field checksum used for duplicate check. |
|
-- integer representation of first 8 digits of sha1 hash of the first field |
|
csum integer not null, |
|
-- unused |
|
flags integer not null, |
|
-- unused |
|
data text not null |
|
); |
|
|
|
-- revlog is a review history; it has a row for every review you've ever done! |
|
CREATE TABLE revlog ( |
|
-- epoch-milliseconds timestamp of when you did the review |
|
id integer primary key, |
|
-- cards.id |
|
cid integer not null, |
|
-- update sequence number: for finding diffs when syncing. |
|
-- See the description in the cards table for more info |
|
usn integer not null, |
|
-- which button you pushed to score your recall. |
|
-- review: 1(wrong), 2(hard), 3(ok), 4(easy) |
|
-- learn/relearn: 1(wrong), 2(ok), 3(easy) |
|
ease integer not null, |
|
-- interval (i.e. as in the card table) |
|
ivl integer not null, |
|
-- last interval (i.e. the last value of ivl. Note that this value is not necessarily equal to the actual interval between this review and the preceding review) |
|
lastIvl integer not null, |
|
-- factor |
|
factor integer not null, |
|
-- how many milliseconds your review took, up to 60000 (60s) |
|
time integer not null, |
|
-- 0=learn, 1=review, 2=relearn, 3=filtered, 4=manual |
|
type integer not null |
|
); |
|
|
|
CREATE INDEX ix_cards_nid on cards (nid); |
|
CREATE INDEX ix_cards_sched on cards (did, queue, due); |
|
CREATE INDEX ix_cards_usn on cards (usn); |
|
CREATE INDEX ix_notes_csum on notes (csum); |
|
CREATE INDEX ix_notes_usn on notes (usn); |
|
CREATE INDEX ix_revlog_cid on revlog (cid); |
|
CREATE INDEX ix_revlog_usn on revlog (usn); |
|
'''.strip() |
|
|
|
# take input |
|
|
|
db_filename = input('Database filename: ').strip() |
|
dbw = sql.connect(db_filename) |
|
dbw.row_factory = Row |
|
db = dbw.cursor() |
|
|
|
csv_file = open(input('Question bank filename: ').strip(), 'r', encoding='windows-1252', newline='') |
|
|
|
if input('Type anything if you want French instead of English:').strip(): |
|
suffix = '_french' |
|
else: |
|
suffix = '_english' |
|
deck_name = input('Deck name: ').strip() |
|
deck_desc = input('Deck description: ').strip() |
|
|
|
# set up ID generation |
|
|
|
now_ms = int(time.time() * 1000) |
|
now = now_ms // 1000 |
|
|
|
# set up collection data |
|
|
|
col = { |
|
'id': 1, |
|
'crt': now, |
|
'mod': now_ms, |
|
'scm': now_ms, |
|
'ver': 11, |
|
'dty': 0, |
|
'usn': 1, |
|
'ls': 0, |
|
'conf': {}, |
|
'models': {str(now_ms): dict( |
|
id=now_ms, |
|
name='Question', |
|
type=0, |
|
mod=now, |
|
usn=1, |
|
sortf=0, |
|
did=now_ms, |
|
tmpls=[dict( |
|
name='Question', |
|
ord=0, |
|
qfmt=QFMT, |
|
afmt=AFMT, |
|
bqfmt='', |
|
bafmt='', |
|
did=None, |
|
)], |
|
flds=[dict( |
|
name=name, |
|
ord=i, |
|
sticky=False, |
|
rtl=False, |
|
font='sans-serif', |
|
size=16, |
|
) for i, name in enumerate(FIELDS)], |
|
css=CSS, |
|
latexPre=r''' |
|
\documentclass[16pt]{article} |
|
\special{papersize=3in,5in} |
|
\usepackage[utf8]{inputenc} |
|
\usepackage{amssymb,amsmath} |
|
\pagestyle{empty} |
|
\setlength{\parindent}{0in} |
|
\begin{document} |
|
'''.strip(), |
|
latexPost=r'\end{document}', |
|
req=[[0, 'all', list(range(len(FIELDS)))]], |
|
tags=[], |
|
vers=[], |
|
)}, |
|
'decks': {str(now_ms): dict( |
|
id=now_ms, |
|
mod=now, |
|
name=deck_name, |
|
desc=deck_desc, |
|
extendNew=0, |
|
extendRev=0, |
|
usn=-1, |
|
newToday=[0, 0], |
|
revToday=[0, 0], |
|
lrnToday=[0, 0], |
|
timeToday=[0, 0], |
|
dyn=0, |
|
conf=1, |
|
collapsed=True, |
|
browsereCollapsed=True, |
|
), '1': dict( |
|
id=1, |
|
mod=0, |
|
name='Default', |
|
usn=0, |
|
lrnToday=[0, 0], |
|
revToday=[0, 0], |
|
newToday=[0, 0], |
|
timeToday=[0, 0], |
|
collapsed=True, |
|
browserCollapsed=True, |
|
desc='', |
|
dyn=0, |
|
conf=1, |
|
extendNew=0, |
|
extendRev=0, |
|
)}, |
|
'dconf': {}, |
|
'tags': {} |
|
} |
|
|
|
# set up notes data |
|
|
|
notes = [] |
|
with csv_file: |
|
reader = csv.DictReader(csv_file, delimiter=';', quoting=csv.QUOTE_NONE) |
|
for i, row in enumerate(reader): |
|
idxs = list(range(4)) |
|
random.shuffle(idxs) |
|
answers = [row['correct_answer' + suffix]] + [ |
|
row[f'incorrect_answer_{i}{suffix}'] for i in range(1, 4)] |
|
|
|
notes.append(dict( |
|
id=now_ms + i, |
|
# yes, the space is required |
|
guid=row['question_id '] + ':' + ''.join(map(str, idxs)), |
|
mid=now_ms, |
|
mod=now, |
|
usn=1, |
|
tags=' ' + row['question_id '].split('-')[1] + ' ', |
|
flds='\x1f'.join(( |
|
row['question_id '], |
|
row['question' + suffix], |
|
*(answers[j] for j in idxs), |
|
'ABCD'[idxs.index(0)], |
|
answers[0] |
|
)), |
|
sfld=row['question_id '], |
|
csum=int(hashlib.sha1(row['question_id '].encode('ascii')).hexdigest()[:8], 16), |
|
flags=0, |
|
data='' |
|
)) |
|
|
|
# make database transactions |
|
|
|
db.executescript(INIT_SQL) |
|
|
|
db.execute( |
|
f'INSERT INTO col ({", ".join(col.keys())}) ' |
|
f'VALUES ({", ".join(":" + key for key in col.keys())})', |
|
{key: value if isinstance(value, (str, int)) |
|
else json.dumps(value) for key, value in col.items()} |
|
) |
|
|
|
db.executemany('INSERT INTO notes VALUES (:id, :guid, :mid, :mod, :usn, :tags, :flds, :sfld, :csum, :flags, :data)', notes) |
|
dbw.commit() |
|
db.execute(r''' |
|
INSERT INTO cards ( |
|
id, nid, did, ord, mod, usn, type, queue, due, |
|
ivl, factor, reps, lapses, left, odue, odid, flags, data |
|
) |
|
SELECT |
|
id, id AS nid, :now_ms AS did, 0 AS ord, mod, 1 AS usn, 0 AS type, |
|
0 AS queue, (id - :now_ms + 1) AS due, 0 AS ivl, 0 AS factor, 0 AS reps, 0 AS lapses, |
|
1004 AS left, 0 AS odue, 0 AS odid, 0 AS flags, '' AS data |
|
FROM notes; |
|
''', dict(now_ms=now_ms)) |
|
dbw.commit() |
|
db.close() |
|
dbw.close() |
|
|
|
# collate apkg file |
|
|
|
with zipfile.ZipFile('out.apkg', 'w') as zfile: |
|
zfile.write(db_filename, 'collection.anki2') |
|
zfile.writestr('media', b'{}') |