Last active
February 12, 2017 14:49
-
-
Save phannam1412/841db22a2c846d21cd0eda831be0a69d to your computer and use it in GitHub Desktop.
download manga
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
include 'vendor/autoload.php'; | |
use Dompdf\Dompdf; | |
$DOMAIN = 'http://manga-scan.com'; | |
function printPdf($manga_name) { | |
date_default_timezone_set('Asia/Ho_Chi_Minh'); | |
$html = ''; | |
$pdf_dir = 'download/' . $manga_name . '/pdf'; | |
createDir($pdf_dir); | |
$start = 0; | |
$total = 0; | |
// Scan each folder for images. Each folder is corresponding to a chapter. | |
for($a = 0; $a<10000;$a++) { | |
$dir = 'download/' . $manga_name . '/' . sprintf("%04.0f",$a); | |
// The folder of this chapter doesn't exist ? | |
// It is because we have already processed the last chapter. | |
if(!file_exists($dir)) break; | |
$names = scandir($dir); | |
// All images of this chapter will be added to html for printing. | |
foreach($names as $name) { | |
$file = $dir . '/' . $name; | |
if(!file_exists($file) || !is_file($file)) continue; | |
$html .= '<img src="'.$file.'">'; | |
} | |
$total += count($names); | |
// Each pdf file must not contain too much images, 200 pages are good enough. | |
if($total > 200) { | |
$pdf_file = $pdf_dir . '/' . $manga_name . '_chap_'. $start .'-' . $a . '.pdf'; | |
// Does this pdf file already exist ? Skip it. | |
if(file_exists($pdf_file)) { | |
$html = ''; | |
$start = $a + 1; | |
continue; | |
} | |
print 'Saving pdf ' . $pdf_file . PHP_EOL; | |
// instantiate and use the dompdf class | |
$dompdf = new Dompdf(); | |
$dompdf->loadHtml($html); | |
// Render the HTML as PDF | |
$dompdf->render(); | |
// Output the generated PDF to Browser | |
file_put_contents($pdf_file,$dompdf->output()); | |
$html = ''; | |
$start = $a + 1; | |
$total = 0; | |
} | |
} | |
if(!empty($html)) { | |
$pdf_file = $pdf_dir . '/' . $manga_name . '_chap_'. $start .'-' . ($a - 1) . '.pdf'; | |
if(file_exists($pdf_file)) return; | |
print 'Saving pdf ' . $pdf_file . PHP_EOL; | |
// instantiate and use the dompdf class | |
$dompdf = new Dompdf(); | |
$dompdf->loadHtml($html); | |
// Render the HTML as PDF | |
$dompdf->render(); | |
// Output the generated PDF to Browser | |
file_put_contents($pdf_file,$dompdf->output()); | |
} | |
} | |
function createDir($dir) { | |
if(!file_exists($dir)) { | |
$result = mkdir($dir); | |
if($result == false) { | |
throw new Exception("Cannot create '$dir' directory"); | |
} | |
$result = chmod($dir,0777); | |
if($result == false) { | |
throw new Exception("Cannot set mod 777 for '$dir' directory"); | |
} | |
} | |
} | |
function run() { | |
global $argv; | |
global $DOMAIN; | |
if(count($argv) < 2) { | |
print 'Please specify link for download' . PHP_EOL; | |
return; | |
} | |
$DOWNLOAD_DIR = 'download'; | |
createDir($DOWNLOAD_DIR); | |
$link = $argv[1]; | |
$info = pathinfo($link); | |
$manga_name = $info['filename']; | |
$save_to = $DOWNLOAD_DIR . '/' . $manga_name; | |
createDir($save_to); | |
print 'Retrieving all chapter links...' . PHP_EOL; | |
$qp = html5qp($link); | |
$chap_nodes = $qp->find('.divContenuCentre .floatLeft.cacheOverflow a'); | |
$chap_links = ''; | |
foreach($chap_nodes as $chap_node) | |
$chap_links[] = $DOMAIN . $chap_node->attr('href'); | |
foreach($chap_links as $index => $chap_link) | |
downloadChap($index,$chap_link,$save_to); | |
printPdf($manga_name); | |
} | |
function downloadChap($chap_index,$chap_link,$save_to) { | |
$dir = $save_to . '/' . sprintf("%04.0f",$chap_index); | |
createDir($dir); | |
print 'Retrieving number of pages of chapter '.($chap_index + 1).'...' . PHP_EOL; | |
$qp = htmlqp($chap_link); | |
$total_page = intval($qp->find('#divFormNbImages')->text()); | |
print "Ready to download $total_page pages" . PHP_EOL; | |
for($a=0;$a<$total_page;$a++) { | |
$saved_img = $dir . '/' . sprintf("%04.0f",$a); | |
if(file_exists($saved_img . '.png') || file_exists($saved_img . '.jpg') || file_exists($saved_img . '.jpeg')) { | |
continue; | |
} | |
$link = $chap_link . '?page=' . ($a + 1); | |
print 'Accessing web page ' . $link . PHP_EOL; | |
$qp = html5qp($link); | |
$src = $qp->find('#divLectureContenu img')->attr('src'); | |
$info = pathinfo($src); | |
$file_path = $saved_img . '.' . $info['extension']; | |
if(file_exists($file_path)) continue; | |
if(empty($info['extension'])) continue; | |
$src = 'http://manga-scan.com' . $src; | |
print 'Downloading image...' . PHP_EOL; | |
$img = file_get_contents(str_replace(' ','%20',$src)); | |
file_put_contents($file_path,$img); | |
} | |
} | |
run(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment