Last active
December 21, 2022 16:05
-
-
Save novaugust/e13b86e8f39d693b72069ff149f0acef to your computer and use it in GitHub Desktop.
instagram to ghost scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Mix.install [:floki, :jason] | |
defmodule InstaDumpParser do | |
def parse_url(url) do | |
# ghost uploads to this, keeping filename: | |
# https://blog.novaugust.net/content/images/2022/12/10616813_1695162544038552_1763391856_n_17841611368072760.jpg | |
[year, month, filename] = Regex.run(~r|media/posts/(\d\d\d\d)(\d\d)/(.*)|, url, capture: :all_but_first) | |
%{year: year, month: month, filename: filename} | |
end | |
def tag_frequencies(document) do | |
all_text = Floki.find(document, "body") |> Floki.text(sep: "\n") | |
tags = Regex.scan(~r/\#(\S+)/, all_text, capture: :all_but_first) |> List.flatten | |
Enum.frequencies(tags) | |
end | |
def parse_coords(table) do | |
[lat, long] = for div <- Floki.find(table, "._2piu>div"), do: Floki.text(div) | |
%{lat: lat, long: long} | |
end | |
def parse_people(table) do | |
tagged = table |> Floki.find("._2piu") |> Floki.text() | |
for s <- String.split(tagged, ~r/ \(Tagged, 0.00, 0.00\),?/), s = String.trim(s), s != "", do: s | |
end | |
def parse(path) do | |
{:ok, document} = path |> File.read!() |> Floki.parse_document | |
for post <- Floki.find(document, ".pam") do | |
imgs = post |> Floki.find("img") |> Floki.attribute("src") | |
{coords, ppl} = | |
case Floki.find(post, "table") do | |
[coordinates, people] -> | |
{parse_coords(coordinates), parse_people(people)} | |
[coords_or_people] -> | |
if String.contains?(Floki.text(coords_or_people), "Latitude"), | |
do: {parse_coords(coords_or_people), nil}, | |
else: {nil, parse_people(coords_or_people)} | |
[] -> {nil, nil} | |
end | |
body = post |> Floki.find("._2pim") |> Floki.text() | |
timestamp = post |> Floki.find("._3-94") |> Floki.text | |
%{imgs: imgs, coords: coords, ppl: ppl, txt: body, timestamp: timestamp} | |
end | |
end | |
end | |
json = "./content/posts_1.html" |> InstaDumpParser.parse() |> Jason.encode!() | |
File.write!("posts.json", json) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Mix.install [:jason, :mobiledoc] | |
defmodule MobileDoc.Card.Markdown do | |
defstruct name: "markdown" | |
defmodule Html do | |
def setup(buffer, _options, _env, %{"markdown" => markdown}) do | |
buffer ++ [markdown] | |
end | |
end | |
end | |
defmodule MobileDoc.Card.Image do | |
defstruct name: "image" | |
defmodule Html do | |
def setup(buffer, _options, _env, %{"src" => src} = card) do | |
img = ~s|<img src="#{src}" alt="#{card["alt"]}">| | |
figure = if caption = card["caption"], do: "<figure>#{img}<figcaption>#{caption}</figcaption>", else: img | |
buffer ++ [figure] | |
end | |
end | |
end | |
alias MobileDoc.Renderer_0_3, as: MD | |
%{"db" => [%{"data" => data}]} = "novaugust.ghost.2022-12-20-12-46-53.json" |> File.read!() |> Jason.decode!() | |
post = data["posts"] |> List.last | |
md = post["mobiledoc"] |> Jason.decode!() | |
MD.render(md, %{"markdown" => MobileDoc.Card.Markdown}) |> IO.puts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment