Skip to content

Instantly share code, notes, and snippets.

@antiops
Created November 1, 2020 04:16
Show Gist options
  • Select an option

  • Save antiops/2d6fcf7a78111e09aefad45eeaaa5e21 to your computer and use it in GitHub Desktop.

Select an option

Save antiops/2d6fcf7a78111e09aefad45eeaaa5e21 to your computer and use it in GitHub Desktop.
pixieset.com dumper
/* pixieset.com full size image scraper
*
* Rips all images in highest quality
*
* To use scroll to the very bottom of the album and press F12 then paste the below snippet in and press enter.
* The full file list will be copied to your clipboard afterwards. You can use jdownloader to easily download the full list.
*/
(() => {
const data = document.querySelectorAll('.masonry-brick img')
let arr = []
for (let i = 0; i < data.length; i++) {
arr.push(data[i].src.replace(/-medium/g, '-xxlarge'))
}
console.log(arr)
let dataToArray = arr.toString().split(',').map(item => item.trim())
const dummy = document.createElement('textarea')
document.body.appendChild(dummy)
dummy.value = dataToArray.join('\n')
dummy.select()
document.execCommand('copy')
document.body.removeChild(dummy)
})()
@DeflateAwning
Copy link
Copy Markdown

Perform final download with:

wget -i "url_list_pasted.txt" -P "output_folder/"

@CyberAstronaut101
Copy link
Copy Markdown

Thank you for this!

@smejky
Copy link
Copy Markdown

smejky commented Sep 28, 2022

before performing wget, replace "xlarge" with "xxlarge" in the list of urls and you will get 1600px photos instead of 1024px photos.

@DeflateAwning
Copy link
Copy Markdown

before performing wget, replace "xlarge" with "xxlarge" in the list of urls and you will get 1600px photos instead of 1024px photos.

My fork does that automatically: https://gist.github.com/DeflateAwning/8567037cc7125cb3ede76fed40d27ba1

@brozikcz
Copy link
Copy Markdown

Thank you!

@m1ndy
Copy link
Copy Markdown

m1ndy commented Nov 18, 2024

For Firefox, and with xxlarge (~ 1600x2400) it's simpler:

  1. Scroll to bottom of Pixieset page to see all photos loaded.
  2. open Console (F12)
  3. Paste:
(() => {
  const data = document.querySelectorAll('img');
  let arr = [];

  data.forEach(img => {
    let url = img.src || img.getAttribute('data-src');

    if (url && url.includes('images.pixieset.com')) {
      arr.push(url.replace(/-medium|-large|-xlarge/g, '-xxlarge'));
    }
  });

  if (arr.length > 0) {
    console.log(arr.join('\n'));
  } else {
    console.warn('No matching images found.');
  }
})();
  1. the URLs should be printed out now. Right click on that list and 'Copy Object'.
  2. Paste into a new text file and save it as url_list_pasted.txt
  3. Follow @DeflateAwning's wget -i "url_list_pasted.txt" -P "output_folder/".
  4. All the images should download serially (couldn't figure out parallelism) in the folder

@younesdev123
Copy link
Copy Markdown

🧭 How to use

  1. Create a file urls.json and put your image URLs inside:

    ["https://example.com/image1.jpg", "https://example.com/image2.png"]
  2. Install dependencies

    npm init -y && npm i axios archiver cli-progress mime-types
  3. Run the script

    node yourfile.js urls.json

💾 The images will be downloaded with progress tracking, and a images.zip file will be created inside the output folder.

const fs = require("fs");
const fsp = fs.promises;
const path = require("path");
const axios = require("axios");
const Archiver = require("archiver");
const mime = require("mime-types");
const cliProgress = require("cli-progress");


const OUTPUT_DIR = path.resolve(__dirname, "output");
const DOWNLOAD_DIR = path.join(OUTPUT_DIR, "downloads");
const ZIP_PATH = path.join(OUTPUT_DIR, "images.zip");

const FALLBACK_URLS = [
  "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Fronalpstock_2.jpg/640px-Fronalpstock_2.jpg",
  "https://upload.wikimedia.org/wikipedia/commons/thumb/f/f9/Phoenicopterus_ruber_in_S%C3%A3o_Paulo_Zoo.jpg/640px-Phoenicopterus_ruber_in_S%C3%A3o_Paulo_Zoo.jpg"
];


async function readUrlsFromArg() {
  const arg = process.argv[2];
  if (!arg) return FALLBACK_URLS;
  const p = path.resolve(process.cwd(), arg);
  const raw = await fsp.readFile(p, "utf8");
  const arr = JSON.parse(raw);
  if (!Array.isArray(arr)) throw new Error("Le fichier JSON doit contenir un tableau d'URLs.");
  return arr;
}


function buildFileName(url, index, contentType) {
  const u = new URL(url);
  let base = path.basename(u.pathname);
  if (!base || base === "/") base = `image_${index + 1}`;


  const hasExt = path.extname(base);
  if (!hasExt) {
    const ext = mime.extension(contentType || "") || "bin";
    base = `${base}.${ext}`;
  }

  base = base.replace(/[^a-zA-Z0-9._-]/g, "_");
  return base;
}

async function ensureUnique(destDir, fileName) {
  let name = fileName;
  let counter = 1;
  while (true) {
    const full = path.join(destDir, name);
    try {
      await fsp.access(full);
      const parsed = path.parse(fileName);
      name = `${parsed.name}(${counter++})${parsed.ext}`;
    } catch {
      return path.join(destDir, name);
    }
  }
}

async function downloadWithProgress(url, destPath, bars) {
  const response = await axios.get(url, { responseType: "stream", validateStatus: s => s >= 200 && s < 400 });
  const total = Number(response.headers["content-length"]) || 0;
  const type = response.headers["content-type"];

  // Si le nom a été généré sans extension correcte (cas rare), on pourrait l'ajuster ici
  const outStream = fs.createWriteStream(destPath);

  return new Promise((resolve, reject) => {
    let downloaded = 0;
    if (total > 0) bars.file.start(total, 0);

    response.data.on("data", (chunk) => {
      downloaded += chunk.length;
      if (total > 0) bars.file.update(downloaded);
      bars.total.increment(chunk.length);
    });

    response.data.on("error", reject);

    outStream.on("finish", () => {
      if (total > 0) bars.file.update(total);
      resolve({ bytes: downloaded, contentType: type });
    });

    response.data.pipe(outStream);
  });
}

async function makeZip(fromDir, toZipPath) {
  await fsp.mkdir(path.dirname(toZipPath), { recursive: true });
  await new Promise((resolve, reject) => {
    const output = fs.createWriteStream(toZipPath);
    const archive = Archiver("zip", { zlib: { level: 9 } });

    output.on("close", resolve);
    archive.on("error", reject);

    archive.pipe(output);
    archive.directory(fromDir, false);
    archive.finalize();
  });
}

async function main() {
  const urls = await readUrlsFromArg();
  if (urls.length === 0) throw new Error("Aucune URL fournie.");

  await fsp.mkdir(DOWNLOAD_DIR, { recursive: true });

  const multibar = new cliProgress.MultiBar({
    clearOnComplete: false,
    hideCursor: true,
    format: "{name} {bar} {percentage}% | {value}/{total}"
  }, cliProgress.Presets.shades_classic);

  const totalBar = multibar.create(1, 0, { name: "Total" });
  // On initialisera la taille totale si possible, autrement on la laissera croître.

  // Première passe: tenter de sommer les Content-Length
  let sumLengths = 0;
  const headResults = await Promise.allSettled(urls.map(u => axios.head(u).catch(() => null)));
  headResults.forEach(r => {
    if (r.status === "fulfilled" && r.value) {
      const len = Number(r.value.headers["content-length"]) || 0;
      if (len > 0) sumLengths += len;
    }
  });
  if (sumLengths > 0) totalBar.setTotal(sumLengths);

  const results = [];

  for (let i = 0; i < urls.length; i++) {
    const url = urls[i];
    let contentType;
    try {
      const h = await axios.head(url);
      contentType = h.headers["content-type"];
    } catch {}

    const baseName = buildFileName(url, i, contentType);
    const destPath = await ensureUnique(DOWNLOAD_DIR, baseName);

    const fileBar = multibar.create(1, 0, { name: path.basename(destPath) });

    try {
      const { bytes } = await downloadWithProgress(url, destPath, { file: fileBar, total: totalBar });
      results.push({ url, path: destPath, bytes, ok: true });
    } catch (err) {
      fileBar.stop();
      results.push({ url, path: destPath, bytes: 0, ok: false, error: String(err.message || err) });
      console.error(`\nErreur sur ${url}:`, err.message || err);
    } finally {
      fileBar.stop();
    }
  }

  multibar.stop();

  // Création du ZIP
  console.log("\nCréation du ZIP...");
  await makeZip(DOWNLOAD_DIR, ZIP_PATH);
  console.log(`ZIP créé: ${ZIP_PATH}`);

  // Petit récap
  const ok = results.filter(r => r.ok).length;
  const ko = results.length - ok;
  console.log(`\nTéléchargements réussis: ${ok}/${results.length}`);
  if (ko) {
    console.log("Échecs:");
    results.filter(r => !r.ok).forEach(r => console.log(" - ", r.url));
  }
}

main().catch(err => {
  console.error("\nÉchec:", err);
  process.exit(1);
});

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment