Skip to content

Instantly share code, notes, and snippets.

@qnkhuat
Created December 16, 2024 07:39
Show Gist options
  • Save qnkhuat/7b0dd630839643d56d854195fc9202f3 to your computer and use it in GitHub Desktop.
Save qnkhuat/7b0dd630839643d56d854195fc9202f3 to your computer and use it in GitHub Desktop.
Script to dry run alert migration for cloud instances
(ns dry-run-migrate-alert
(:require
[clj-http.client :as http]
[clojure.data.csv :as csv]
[honey.sql :as sql]
[metabase.db.custom-migrations.pulse-to-notification :as pulse-to-notification]
[metabase.util.date-2 :as u.date]
[metabase.util.json :as json]
[metabase.util.random :as u.random]
[toucan2.core :as t2]))
(def api-key "FILLME")
(defn query-stats-for-sql [api-key sql]
(prn sql)
(let [query-param (str "query=" (java.net.URLEncoder/encode
(json/encode
{:database 45 ;; hosting insight prod
:type "native"
:native {:query sql}
:middleware {:js-int-to-string? true
:add-default-userland-constraints? true}})
"UTF-8"))
headers {"User-Agent" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:122.0) Gecko/20100101 Firefox/122.0"
"Accept" "*/*"
"Accept-Language" "en-US,en;q=0.5"
"Content-Type" "application/x-www-form-urlencoded;charset=UTF-8"
"Origin" "https://stats.metabase.com"
"Connection" "keep-alive"
"x-api-key" api-key}]
(next (csv/read-csv (:body (http/post "https://stats.metabase.com/api/dataset/csv?format_rows=false"
{:body query-param
:headers headers}))))))
(defn return-if-non-nil [x]
(fn [y] (if y x nil)))
(defn maybe-do
[f]
(fn [x] (some-> x not-empty f)))
(def tables
{"core_user" {:queries (fn [id]
[{:select [:id :email :date_joined]
:where [:= :etl_source_instance_id [:inline id]]
:from [:core_user]}])
:transform {:id parse-long
:email (fn [& _args] (u.random/random-email))
:date_joined (maybe-do u.date/parse)}}
"report_card" {:queries (fn [id]
[{:select [[:report_card.id :id] :name :description :creator_id :database_id :collection_id :visualization_settings :display :dataset_query :created_at :updated_at]
:from [:report_card]
:where [:and
[:= :report_card.etl_source_instance_id [:inline id]]
[:exists {:select [:id]
:from [:pulse_card]
:where [:and [:= :card_id :report_card.id]
[:= :etl_source_instance_id :report_card.etl_source_instance_id]]}]]}])
:transform {:id parse-long
:database_id (constantly 1)
:visualization_settings #(or % "{}")
:creator_id (maybe-do parse-long)
:created_at (maybe-do u.date/parse)
:updated_at (maybe-do u.date/parse)
:collection_id (constantly 1)}}
"report_dashboard" {:queries (fn [id]
[{:select [:id :name :description :collection_id :created_at :updated_at :creator_id :parameters]
:from [:report_dashboard]
:where [:and [:= :etl_source_instance_id [:inline id]]
[:exists {:select [:id]
:from [:pulse]
:where [:and [:= :dashboard_id :report_dashboard.id]
[:= :etl_source_instance_id :report_dashboard.etl_source_instance_id]]}]]}])
:transform {:id parse-long
:created_at u.date/parse
:updated_at u.date/parse
:parameters (constantly "{}")
:creator_id parse-long
:collection_id (constantly 1)}}
"report_dashboardcard" {:queries (fn [id]
[{:select [:id :size_x :size_y :row :col :parameter_mappings :visualization_settings :card_id :dashboard_id :created_at :updated_at]
:where [:and
[:= :etl_source_instance_id [:inline id]]
[:exists {:select [:id]
:from [:pulse_card]
:where [:and [:= :dashboard_card_id :report_dashboardcard.id]
[:= :etl_source_instance_id :report_dashboardcard.etl_source_instance_id]]}]]
:from [:report_dashboardcard]}])
:transform {:id parse-long
:created_at u.date/parse
:updated_at u.date/parse
:size_x parse-long
:size_y parse-long
:row parse-long
:col parse-long
:visualization_settings #(or % "{}")
:card_id (maybe-do parse-long)
:dashboard_id (maybe-do parse-long)}}
"pulse" {:queries (fn [id]
[{:select [:id :name :creator_id :alert_condition :alert_first_only :archived :parameters :created_at :updated_at]
:from [:pulse]
:where [:= :etl_source_instance_id [:inline id]]}])
:transform {:id parse-long
:creator_id parse-long
:alert_first_only parse-boolean
:archived parse-boolean
:created_at u.date/parse
:updated_at u.date/parse}}
"pulse_card" {:queries (fn [id]
[{:select [:id :pulse_id :card_id :position :include_csv :include_xls :dashboard_card_id]
:from [:pulse_card]
:where [:and [:= :etl_source_instance_id [:inline id]]]}])
:transform {:id parse-long
:pulse_id parse-long
:card_id parse-long
:position parse-long
:include_csv parse-boolean
:include_xls parse-boolean
:dashboard_card_id parse-long}}
"pulse_channel_recipient" {:queries (fn [id]
[{:select [:id :pulse_channel_id :user_id]
:from [:pulse_channel_recipient]
:where [:= :etl_source_instance_id [:inline id]]}])
:transform {:id parse-long
:pulse_channel_id parse-long
:user_id parse-long}}
"pulse_channel" {:queries (fn [id]
[{:select [:id :pulse_id :channel_type :details :schedule_type :schedule_hour :schedule_day :schedule_frame :enabled #_:channel_id :created_at :updated_at]
:from [:pulse_channel]
:where [:= :etl_source_instance_id [:inline id]]}])
:transform {:id parse-long
:pulse_id parse-long
:enabled parse-boolean
:schedule_hour parse-long
:channel_id (return-if-non-nil 1)
;; sadly we this is encrypted
:details (constantly "{}")
:channel_hour (maybe-do parse-long)
:created_at u.date/parse
:updated_at u.date/parse}}})
(defn sqls-for-table
[table-name instance-id]
(mapcat sql/format ((get-in tables [table-name :queries]) instance-id)))
(defn query-stats [api-key table instance-id]
(prn :QUERY_STATS table)
(let [sqls (sqls-for-table table instance-id)
columns (map #(if (sequential? %) (last %) %) (:select (first ((get-in tables [table :queries]) "123"))))]
(map (fn [row]
(let [row-map (zipmap columns row)]
(into {} (for [[k v] row-map]
(do
[k ((get-in tables [table :transform k] identity) v)])))))
(mapcat (fn [sql] (query-stats-for-sql api-key sql)) sqls))))
(defn load-data! [api-key instance-id]
(let [tables ["core_user"
"report_card"
"report_dashboard"
"report_dashboardcard"
"pulse"
"pulse_card"
"pulse_channel"
"pulse_channel_recipient"]]
(doseq [table tables]
(t2/delete! table))
(doseq [table tables
:let [data (query-stats api-key table instance-id)]]
(println (format "Loading %d rows into %s" (count data) table))
(doseq [data (partition-all 5000 data)]
(t2/insert! table data)))))
(defn check
[]
(assert (= (t2/count :model/Notification)
(t2/count :model/Pulse :archived false :alert_condition [:in ["rows" "goal"]]))
"Notification count does not match Pulse count"))
(defn process
"id is a cloud instance id."
[id]
(println "Processing" id)
(t2/delete! :model/Notification)
(load-data! api-key id)
(pulse-to-notification/migrate-alerts!)
(check)
(println "DONE"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment