Skip to content

Instantly share code, notes, and snippets.

@kevgs
Created March 3, 2020 11:23
Show Gist options
  • Save kevgs/966f7c7afb869dab46a565bea429eb87 to your computer and use it in GitHub Desktop.
Save kevgs/966f7c7afb869dab46a565bea429eb87 to your computer and use it in GitHub Desktop.
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 4808d91e275..a20304b0937 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -52,13 +52,11 @@ step which modifies the database, is started */
#define LOG_CHECKPOINT_FREE_PER_THREAD (4U << srv_page_size_shift)
#define LOG_CHECKPOINT_EXTRA_FREE (8U << srv_page_size_shift)
-typedef ulint (*log_checksum_func_t)(const byte* log_block);
-
/** this is where redo log data is stored (no header, no checkpoints) */
static const char LOG_DATA_FILE_NAME[] = "ib_logdata";
/** creates LOG_DATA_FILE_NAME with specified size */
-dberr_t create_data_file(os_offset_t size);
+dberr_t create_log_file(const char *path, os_offset_t size);
static const char LOG_FILE_NAME_PREFIX[] = "ib_logfile";
static const char LOG_FILE_NAME[] = "ib_logfile0";
@@ -913,6 +911,63 @@ extern os_event_t log_scrub_event;
/** Whether log_scrub_thread is active */
extern bool log_scrub_thread_active;
+namespace redo
+{
+
+class redo_t
+{
+ static const char DATA_FILE_NAME[];
+ static const char MAIN_FILE_NAME[];
+
+ static const unsigned BIT_SET= 1;
+ static const unsigned BIT_UNSET= 0;
+
+ enum class record_type_t : byte {
+ CHECKPOINT = 0,
+ FILE_OPERATION = 1,
+ };
+
+ /** Number of the next checkpoint to append */
+ uint64_t m_checkpoint = 0;
+
+ log_file_t m_main_file;
+ os_offset_t m_main_file_tail; // where to append
+
+ log_file_t m_data_file;
+ os_offset_t m_data_file_position;
+ os_offset_t m_data_file_size;
+ unsigned m_sequence_bit : 1;
+
+ std::mutex m_mutex;
+
+public:
+ /** Initialize redo log files */
+ static dberr_t create_files(os_offset_t data_file_size);
+ /** Write initial info to a newly created files */
+ dberr_t initialize_files();
+
+ /** Thread unsafe! */
+ dberr_t open_files();
+ /** Thread unsafe! */
+ dberr_t close_files();
+
+ dberr_t append_mtr_data(const mtr_buf_t &buf);
+ /** Calls fdatasync() or similar */
+ dberr_t flush_data() { return m_data_file.flush_data_only(); }
+
+ dberr_t append_checkpoint();
+
+private:
+ void flip_sequence_bit() { m_sequence_bit= ~m_sequence_bit; }
+
+ dberr_t append_wrapped(span<byte> buf);
+ dberr_t read_wrapped(os_offset_t offset, span<byte> buf);
+};
+
+extern redo_t new_redo;
+
+} // namespace redo
+
#include "log0log.ic"
#endif
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index fea6922efb2..daf7844a066 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1243,6 +1243,7 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
if (!log_sys.log.data_writes_are_durable())
{
log_write_flush_to_disk_low(flush_lsn);
+ redo::new_redo.flush_data();
}
flush_lock.release(flush_lsn);
@@ -1588,6 +1589,7 @@ bool log_checkpoint()
log_sys.next_checkpoint_lsn = oldest_lsn;
log_write_checkpoint_info(end_lsn);
+ redo::new_redo.append_checkpoint();
ut_ad(!log_mutex_own());
return(true);
@@ -2194,16 +2196,15 @@ std::vector<std::string> get_existing_log_files_paths() {
return result;
}
-dberr_t create_data_file(os_offset_t size)
+dberr_t create_log_file(const char *path, os_offset_t size)
{
- ut_ad(size > LOG_MAIN_FILE_SIZE);
+ ut_ad(path);
- const auto path= get_log_file_path(LOG_DATA_FILE_NAME);
- os_file_delete_if_exists(innodb_log_file_key, path.c_str(), nullptr);
+ os_file_delete_if_exists(innodb_log_file_key, path, nullptr);
bool ret;
pfs_os_file_t file=
- os_file_create(innodb_log_file_key, path.c_str(),
+ os_file_create(innodb_log_file_key, path,
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
OS_LOG_FILE, srv_read_only_mode, &ret);
@@ -2215,7 +2216,7 @@ dberr_t create_data_file(os_offset_t size)
ib::info() << "Setting log file " << path << " size to " << size << " bytes";
- ret= os_file_set_size(path.c_str(), file, size);
+ ret= os_file_set_size(path, file, size);
if (!ret)
{
os_file_close(file);
@@ -2236,3 +2237,188 @@ dberr_t create_data_file(os_offset_t size)
return DB_SUCCESS;
}
+
+namespace redo
+{
+
+redo_t new_redo;
+
+const char redo_t::DATA_FILE_NAME[]= "new_ib_logdata";
+const char redo_t::MAIN_FILE_NAME[]= "new_ib_logfile0";
+
+dberr_t redo_t::create_files(os_offset_t data_file_size)
+{
+ if (dberr_t err= create_log_file(get_log_file_path(DATA_FILE_NAME).c_str(),
+ data_file_size))
+ {
+ return err;
+ }
+
+ std::string main_file_path= get_log_file_path(MAIN_FILE_NAME);
+ if (dberr_t err= create_log_file(main_file_path.c_str(), 512))
+ return err;
+
+ log_file_t main_file(std::move(main_file_path));
+ if (dberr_t err= main_file.open(false))
+ return err;
+
+ std::array<byte, 512> header= {0};
+ byte *p= header.data();
+ mach_write_to_4(p + LOG_HEADER_FORMAT, srv_encrypt_log
+ ? log_t::FORMAT_ENC_10_5
+ : log_t::FORMAT_10_5);
+ mach_write_to_4(p + LOG_HEADER_SUBFORMAT, 2);
+ mach_write_to_4(p + LOG_HEADER_START_LSN, 0);
+ strcpy(reinterpret_cast<char *>(p) + LOG_HEADER_CREATOR,
+ LOG_HEADER_CREATOR_CURRENT);
+ log_block_store_checksum(p);
+
+ if (dberr_t err= main_file.write(0, header))
+ return err;
+ if (dberr_t err= main_file.flush_data_only())
+ return err;
+ if (dberr_t err= main_file.close())
+ return err;
+
+ return DB_SUCCESS;
+}
+
+dberr_t redo_t::initialize_files() {
+ return DB_SUCCESS;
+}
+
+dberr_t redo_t::open_files()
+{
+ std::string main_file_path= get_log_file_path(MAIN_FILE_NAME);
+ m_main_file= log_file_t(main_file_path.c_str());
+ m_main_file_tail= os_file_get_size(main_file_path.c_str()).m_total_size;
+ if (dberr_t err = m_main_file.open(false))
+ return err;
+
+ std::string data_file_path= get_log_file_path(DATA_FILE_NAME);
+ m_data_file= log_file_t(data_file_path.c_str());
+ m_data_file_position= 0;
+ m_data_file_size= os_file_get_size(data_file_path.c_str()).m_total_size;
+ return m_data_file.open(false);
+}
+
+dberr_t redo_t::close_files()
+{
+ if (dberr_t err = m_main_file.close())
+ return err;
+ return m_data_file.close();
+}
+
+struct mtr_functor_t
+{
+ std::vector<byte> m_buf;
+
+ bool operator()(const mtr_buf_t::block_t *block)
+ {
+ m_buf.insert(m_buf.end(), block->begin(), block->end());
+ return true;
+ }
+};
+
+dberr_t redo_t::append_mtr_data(const mtr_buf_t &payload)
+{
+ const uint32_t payload_size= payload.size();
+
+ std::array<byte, 8> header_buf;
+ const size_t skip_bit= 0; // do not skip
+ const size_t sequence_bit= 0; // this will be set later
+ const byte *header_end= mlog_encode_varint(
+ header_buf.data(), payload_size << 2 | skip_bit << 1 | sequence_bit);
+ const size_t header_size= header_end - header_buf.data();
+
+ mtr_functor_t accumulator;
+ auto &buf= accumulator.m_buf;
+ buf.reserve(header_size + payload.size() + /* crc32 */ 4);
+ buf.insert(buf.end(), header_buf.begin(), header_buf.begin() + header_size);
+ payload.for_each_block(accumulator);
+ uint32_t crc32= ut_crc32(buf.data(), buf.size());
+ buf.resize(buf.size() + 4);
+ mach_write_to_4(&*buf.end() - 4, crc32);
+
+ std::lock_guard<std::mutex> _(m_mutex);
+
+ // now with real sequence bit (which is mutex protected)
+ const byte *header_end2= mlog_encode_varint(
+ buf.data(), payload_size << 2 | skip_bit << 1 | m_sequence_bit);
+ (void) header_end2;
+ ut_ad(header_size == header_end2 - buf.data());
+ return append_wrapped(buf);
+}
+
+dberr_t redo_t::append_checkpoint()
+{
+ std::lock_guard<std::mutex> _(m_mutex);
+
+ std::array<byte, /* type */ 1 + /* file offset */ 8 + /* sequence bit */ 1> buf;
+ buf[0] = static_cast<byte>(record_type_t::CHECKPOINT);
+ mach_write_to_8(&buf[1], m_checkpoint);
+ buf[1 + 8] = m_sequence_bit;
+
+ if (dberr_t err= m_main_file.write(m_main_file_tail, buf))
+ return err;
+
+ if (!m_main_file.writes_are_durable())
+ if (dberr_t err= m_main_file.flush_data_only())
+ return err;
+
+ m_main_file_tail += buf.size();
+ m_checkpoint += 1;
+
+ return DB_SUCCESS;
+}
+
+dberr_t redo_t::append_wrapped(span<byte> buf)
+{
+ ut_ad(m_data_file_position != m_data_file_size);
+ ut_ad(buf.size() < m_data_file_size); // do not bite own tail!
+
+ if (m_data_file_position + buf.size() > m_data_file_size)
+ {
+ os_offset_t tail_length= m_data_file_size - m_data_file_position;
+ if (dberr_t err= m_data_file.write(m_data_file_position,
+ buf.subspan(0, tail_length)))
+ {
+ return err;
+ }
+ buf= buf.subspan(tail_length, buf.size() - tail_length);
+ m_data_file_position= 0;
+ flip_sequence_bit();
+ }
+
+ if (dberr_t err= m_data_file.write(m_data_file_position, buf))
+ return err;
+
+ m_data_file_position+= buf.size();
+ if (m_data_file_position == m_data_file_size)
+ m_data_file_position= 0;
+
+ return DB_SUCCESS;
+}
+
+dberr_t redo_t::read_wrapped(os_offset_t offset, span<byte> buf)
+{
+ ut_ad(buf.size() < m_data_file_size); // do not bite own tail!
+
+ if (offset + buf.size() > m_data_file_size)
+ {
+ os_offset_t tail_length= m_data_file_size - offset;
+ if (dberr_t err= m_data_file.read(offset, buf.subspan(0, tail_length)))
+ {
+ return err;
+ }
+ buf= buf.subspan(tail_length, buf.size() - tail_length);
+ offset= 0;
+ }
+
+ if (dberr_t err= m_data_file.read(offset, buf))
+ return err;
+
+ return DB_SUCCESS;
+}
+
+} // namespace redo
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 2a9fa49c256..fc5dc46a506 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -747,7 +747,7 @@ dberr_t recv_sys_t::upgrade_file_format_to_10_5_if_needed()
ut_ad(!log_sys.log.data_is_opened());
- if (dberr_t err= create_data_file(srv_log_file_size))
+ if (dberr_t err= create_log_file(get_log_file_path(LOG_DATA_FILE_NAME).c_str(), srv_log_file_size))
return err;
// Copy one block from old file to new file.
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 2e907d6b113..201a88dedcb 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -655,6 +655,8 @@ inline lsn_t mtr_t::finish_write(ulint len)
ut_ad(m_log.size() == len);
ut_ad(len > 0);
+ ut_a(redo::new_redo.append_mtr_data(m_log) == DB_SUCCESS);
+
lsn_t start_lsn;
if (m_log.is_small()) {
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 2b08e0e68ae..37cca4f63e6 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -302,7 +302,13 @@ static dberr_t create_log_file(lsn_t lsn, std::string& logfile0)
DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR););
DBUG_PRINT("ib_log", ("After innodb_log_abort_8"));
- if (dberr_t err = create_data_file(srv_log_file_size)) {
+ if (dberr_t err = create_log_file(
+ get_log_file_path(LOG_DATA_FILE_NAME).c_str(),
+ srv_log_file_size)) {
+ return err;
+ }
+
+ if (dberr_t err = redo::redo_t::create_files(srv_log_file_size)) {
return err;
}
@@ -316,6 +322,7 @@ static dberr_t create_log_file(lsn_t lsn, std::string& logfile0)
}
log_sys.log.open_files(logfile0);
+ redo::new_redo.open_files();
fil_open_system_tablespace_files();
/* Create a log checkpoint. */
@@ -1440,6 +1447,7 @@ dberr_t srv_start(bool create_new_db)
srv_log_file_found = log_file_found;
log_sys.log.open_files(get_log_file_path());
+ redo::new_redo.open_files();
log_sys.log.create();
@@ -1682,6 +1690,7 @@ dberr_t srv_start(bool create_new_db)
err = fil_write_flushed_lsn(log_get_lsn());
ut_ad(!buf_pool_check_no_pending_io());
log_sys.log.close_files();
+ redo::new_redo.close_files();
if (err == DB_SUCCESS) {
bool trunc = srv_operation
== SRV_OPERATION_RESTORE;
@@ -2154,6 +2163,7 @@ void innodb_shutdown()
#endif /* BTR_CUR_HASH_ADAPT */
ibuf_close();
log_sys.close();
+ redo::new_redo.close_files();
purge_sys.close();
trx_sys.close();
if (buf_dblwr) {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment