Last active
November 11, 2024 10:04
-
-
Save ThexXTURBOXx/08988a7c374e1e21ea8d7773b569bb0e to your computer and use it in GitHub Desktop.
Skip adf.ly links using the WaybackMachine semi-automagically!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Licensed under the MIT license. | |
*/ | |
import com.google.gson.Gson; | |
import com.google.gson.GsonBuilder; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.net.HttpURLConnection; | |
import java.net.URL; | |
import java.nio.charset.StandardCharsets; | |
import java.util.ArrayList; | |
import java.util.Base64; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Scanner; | |
import java.util.Set; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class AdflySkipUsingWaybackMachine { | |
public static void main(String[] args) throws Throwable { | |
// Example | |
System.out.println(skip("http://adf.ly/W6VYO")); | |
} | |
/** | |
* {@link Gson} parser instance to use. | |
*/ | |
private static final Gson GSON = new GsonBuilder().create(); | |
/** | |
* Pattern to find the {@code ysmm} to decode within adf.ly's HTML body. | |
*/ | |
private static final Pattern YSMM_PATTERN = Pattern.compile("^.*ysmm\\s*=\\s*['\"](.+?)['\"];.*$", | |
Pattern.MULTILINE | Pattern.DOTALL); | |
/** | |
* Returns possible decodings of the given adf.ly link using data from the WaybackMachine. | |
* Please keep in mind that the decoder can also produce broken results and the decoding process changed over time. | |
* But from my experience, the link (or at least something that is similar enough) is usually contained within | |
* the returned {@link Set}. | |
* | |
* @param adfly The adf.ly link | |
* @return Possible decodings of the given link | |
*/ | |
public static Set<String> skip(String adfly) throws IOException { | |
List<String> sns = getWaybackSnapshots(adfly); | |
if (sns.isEmpty()) | |
throw new IllegalStateException("Unavailable in WaybackMachine!"); | |
Set<String> ret = new HashSet<>(); | |
for (String sn : sns) { | |
String body = getWaybackBody(new URL(sn)); | |
if (body == null) continue; | |
String ysmm = searchYsmm(body); | |
if (ysmm == null) continue; | |
try { | |
ret.add(crackYsmm(ysmm)); | |
} catch (Throwable ignored) { | |
} | |
try { | |
ret.add(crackYsmmOld(ysmm)); | |
} catch (Throwable ignored) { | |
} | |
} | |
if (ret.isEmpty()) | |
throw new IllegalStateException("Cannot find ysmm!"); | |
return ret; | |
} | |
/** | |
* Returns all the snapshots of the given URL that are archived within the WaybackMachine. | |
* The returned list is sorted first by the HTTP status code (2XX is better than 3XX as the latter might indicate | |
* redirects to the "locked" adf.ly page) and then by date (older are better due to better chances of decoding | |
* their {@code ysmm}). | |
* | |
* @param url The URL to search snapshots for | |
* @return A list of snapshots of interests for the given URL | |
*/ | |
private static List<String> getWaybackSnapshots(String url) throws IOException { | |
List<String> urls = new ArrayList<>(); | |
List<String> urls300 = new ArrayList<>(); | |
HttpURLConnection conn = (HttpURLConnection) new URL( | |
"https://web.archive.org/__wb/sparkline?output=json&url=" + url + "&collection=web").openConnection(); | |
conn.setRequestProperty("Referer", "http://web.archive.org/web/20240000000000*/" + url + "?output=json"); | |
String yearsStr = readFully(conn.getInputStream()); | |
List<Integer> years = GSON.fromJson(yearsStr, Models.Years.class).years().keySet().stream() | |
.map(Integer::parseInt).sorted().toList(); | |
for (int year : years) { | |
conn = (HttpURLConnection) new URL( | |
"http://web.archive.org/__wb/calendarcaptures/2?url=" + url + "&date=" + year + "&groupby=day").openConnection(); | |
conn.setRequestProperty("Referer", "http://web.archive.org/web/20240000000000*/" + url + "?output=json"); | |
String daysStr = readFully(conn.getInputStream()); | |
int[][] days = GSON.fromJson(daysStr, Models.Days.class).items(); | |
for (int[] dayArr : days) { | |
String day = String.format("%04d", dayArr[0]); | |
conn = (HttpURLConnection) new URL( | |
"http://web.archive.org/__wb/calendarcaptures/2?url=" + url + "&date=" + year + day).openConnection(); | |
conn.setRequestProperty("Referer", "http://web.archive.org/web/" + year + day + "000000*/" + url); | |
String timesStr = readFully(conn.getInputStream()); | |
int[][] times = GSON.fromJson(timesStr, Models.Times.class).items(); | |
for (int[] timeArr : times) { | |
String time = String.format("%06d", timeArr[0]); | |
int status = timeArr[1]; | |
if (status / 100 == 2) // is 2XX status code? - OK | |
urls.add("http://web.archive.org/web/" + year + day + time + "/" + url); | |
else if (status / 100 == 3) // is 3XX status code? - redirects | |
urls300.add("http://web.archive.org/web/" + year + day + time + "/" + url); | |
// we don't care about other status codes... | |
} | |
} | |
} | |
urls.addAll(urls300); | |
return urls; | |
} | |
/** | |
* Returns the archived body of the given WaybackMachine URL. | |
* | |
* @param waybackURL The WaybackMachine URL | |
* @return The body of the archived page | |
*/ | |
private static String getWaybackBody(URL waybackURL) { | |
String wbURL = waybackURL.toString().replaceFirst("/http", "if_/http"); | |
try { | |
return readFully(new URL(wbURL).openStream()); | |
} catch (Throwable ignored) { | |
} | |
return null; | |
} | |
/** | |
* Searches and returns the {@code ysmm} within the given body. | |
* | |
* @param body The HTML body to search through | |
* @return The {@code ysmm} to decode | |
*/ | |
private static String searchYsmm(String body) { | |
Matcher m = YSMM_PATTERN.matcher(body); | |
if (m.matches()) { | |
return m.group(1); | |
} | |
return null; | |
} | |
/** | |
* My manual "old" approach to decode a given {@code ysmm}. | |
* | |
* @param ysmm The {@code ysmm} to decode | |
* @return The decoded URL | |
*/ | |
private static String crackYsmmOld(String ysmm) { | |
StringBuilder left = new StringBuilder(); | |
StringBuilder right = new StringBuilder(); | |
for (int i = 0; i < ysmm.length(); i += 2) { | |
if (i + 1 < ysmm.length()) { | |
left.append(ysmm.charAt(i)); | |
right.insert(0, ysmm.charAt(i + 1)); | |
} else { | |
// Handle case with single character remaining | |
left.append(ysmm.charAt(i)); | |
} | |
} | |
String decodedUri = new String(Base64.getDecoder().decode(left.toString() + right), StandardCharsets.UTF_8) | |
.substring(2); | |
Pattern pattern = Pattern.compile("go\\.php\\?u="); | |
Matcher matcher = pattern.matcher(decodedUri); | |
if (matcher.find()) { | |
decodedUri = new String(Base64.getDecoder().decode(matcher.replaceAll("")), StandardCharsets.UTF_8); | |
} | |
return decodedUri; | |
} | |
/** | |
* JDownloader's approach to decode a given {@code ysmm}. | |
* | |
* @param ysmm The {@code ysmm} to decode | |
* @return The decoded URL | |
* @see | |
* <a href="https://github.com/mirror/jdownloader/blob/master/src/jd/plugins/decrypter/AdfLy.java>JDownloader's GitHub repo</a> | |
*/ | |
private static String crackYsmm(String ysmm) { | |
String finallink; | |
StringBuilder C = new StringBuilder(); | |
StringBuilder h = new StringBuilder(); | |
for (int s = 0; s < ysmm.length(); ++s) { | |
if (s % 2 == 0) { | |
C.append(ysmm.charAt(s)); | |
} else { | |
h.insert(0, ysmm.charAt(s)); | |
} | |
} | |
// new 20170914 | |
final String[] a = (C + h.toString()).split(""); | |
/*for (int b = 0; b < a.length; ++b) { | |
if (a[b].matches("\\d")) { | |
for (int c = b + 1; c < a.length; ++c) { | |
if (a[c].matches("\\d")) { | |
final int d = Integer.parseInt(a[b]) ^ Integer.parseInt(a[c]); | |
if (d < 10) { | |
a[b] = d + ""; | |
} | |
b = c; | |
c = a.length; | |
} | |
} | |
} | |
}*/ | |
String sec = new String(Base64.getDecoder().decode(buildString(a, ""))); | |
// remove padding, I went with auto padding correction. | |
int pcount = sec.indexOf("http"); | |
if (pcount == -1) { | |
pcount = sec.indexOf("ftp"); | |
} | |
if (pcount > -1) { | |
// this works on the assumption that it's the same offset | |
finallink = sec.substring(pcount, sec.length() - pcount); | |
} else { | |
// At this time its 16 chars, prefix and postfix. | |
finallink = sec.substring(16, sec.length() - 16); | |
} | |
return finallink; | |
} | |
/** | |
* Helper method for JDownloader's {@code ysmm} decoder. | |
*/ | |
private static String buildString(final Object[] array, final String separator) { | |
if (array != null) { | |
final StringBuilder s = new StringBuilder(); | |
for (final Object elem : array) { | |
if (!s.isEmpty() && separator != null) { | |
s.append(separator); | |
} | |
if (elem instanceof String) { | |
final String value = ((String) elem).trim(); | |
s.append(value); | |
} else { | |
s.append(elem); | |
} | |
} | |
return s.toString(); | |
} else { | |
return null; | |
} | |
} | |
/** | |
* Reads the given {@link InputStream} fully and returns a {@link String} representing its content. | |
* | |
* @param stream The {@link InputStream} to read | |
* @return The read data represented as a {@link String} | |
*/ | |
private static String readFully(InputStream stream) { | |
try (Scanner s = new Scanner(stream).useDelimiter("\\A")) { | |
return s.hasNext() ? s.next() : ""; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment