Created
April 16, 2014 05:36
-
-
Save tedzhou/10811840 to your computer and use it in GitHub Desktop.
抓数据的工具集
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ioPool |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="CompilerConfiguration"> | |
<option name="DEFAULT_COMPILER" value="Javac" /> | |
<resourceExtensions /> | |
<wildcardResourcePatterns> | |
<entry name="!?*.java" /> | |
<entry name="!?*.form" /> | |
<entry name="!?*.class" /> | |
<entry name="!?*.groovy" /> | |
<entry name="!?*.scala" /> | |
<entry name="!?*.flex" /> | |
<entry name="!?*.kt" /> | |
<entry name="!?*.clj" /> | |
</wildcardResourcePatterns> | |
<annotationProcessing> | |
<profile default="true" name="Default" enabled="false"> | |
<processorPath useClasspath="true" /> | |
</profile> | |
</annotationProcessing> | |
</component> | |
</project> | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<component name="ProjectDictionaryState"> | |
<dictionary name="ted" /> | |
</component> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" /> | |
</project> | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="JavaScriptLibraryMappings"> | |
<file url="file://$PROJECT_DIR$/ioHelper.js" libraries="{Node.js Globals}" /> | |
<file url="file://$PROJECT_DIR$/ioPool.js" libraries="{Node.js Globals}" /> | |
</component> | |
</project> | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="ProjectInspectionProfilesVisibleTreeState"> | |
<entry key="Project Default"> | |
<profile-state> | |
<expanded-state> | |
<State> | |
<id /> | |
</State> | |
<State> | |
<id>CSS</id> | |
</State> | |
<State> | |
<id>General</id> | |
</State> | |
<State> | |
<id>GeneralJavaScript</id> | |
</State> | |
<State> | |
<id>JavaScript</id> | |
</State> | |
<State> | |
<id>Probable bugs</id> | |
</State> | |
</expanded-state> | |
<selected-state> | |
<State> | |
<id>Class structure</id> | |
</State> | |
</selected-state> | |
</profile-state> | |
</entry> | |
</component> | |
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true"> | |
<output url="file://$PROJECT_DIR$/out" /> | |
</component> | |
<component name="SvnConfiguration" maxAnnotateRevisions="500" myUseAcceleration="nothing" myAutoUpdateAfterCommit="false" cleanupOnStartRun="false" SSL_PROTOCOLS="sslv3"> | |
<option name="USER" value="" /> | |
<option name="PASSWORD" value="" /> | |
<option name="mySSHConnectionTimeout" value="30000" /> | |
<option name="mySSHReadTimeout" value="30000" /> | |
<option name="LAST_MERGED_REVISION" /> | |
<option name="MERGE_DRY_RUN" value="false" /> | |
<option name="MERGE_DIFF_USE_ANCESTRY" value="true" /> | |
<option name="UPDATE_LOCK_ON_DEMAND" value="false" /> | |
<option name="IGNORE_SPACES_IN_MERGE" value="false" /> | |
<option name="CHECK_NESTED_FOR_QUICK_MERGE" value="false" /> | |
<option name="IGNORE_SPACES_IN_ANNOTATE" value="true" /> | |
<option name="SHOW_MERGE_SOURCES_IN_ANNOTATE" value="true" /> | |
<option name="FORCE_UPDATE" value="false" /> | |
<option name="IGNORE_EXTERNALS" value="false" /> | |
<configuration useDefault="true">$USER_HOME$/.subversion</configuration> | |
<myIsUseDefaultProxy>false</myIsUseDefaultProxy> | |
</component> | |
</project> | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="ProjectModuleManager"> | |
<modules> | |
<module fileurl="file://$PROJECT_DIR$/ioPool.iml" filepath="$PROJECT_DIR$/ioPool.iml" /> | |
</modules> | |
</component> | |
</project> | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<component name="DependencyValidationManager"> | |
<state> | |
<option name="SKIP_IMPORT_STATEMENTS" value="false" /> | |
</state> | |
</component> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="VcsDirectoryMappings"> | |
<mapping directory="" vcs="" /> | |
</component> | |
</project> | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'); | |
var fs = require('fs'); | |
var url = require('url'); | |
var request = require('request'); | |
var iconv = require('iconv-lite'); | |
var mkdirp = require('mkdirp'); | |
var path = require('path'); | |
/** | |
* 获取url返回的html | |
* @param urlStr | |
* @param callback(e, html) | |
*/ | |
function getHtml(urlStr, callback) { | |
if (!urlStr || !callback) return; | |
if (typeof urlStr == "string") { | |
urlStr = url.parse(urlStr); | |
} | |
http.get(urlStr,function (res) { | |
var source = ""; | |
var html = ""; | |
res.setEncoding('binary'); | |
res.on('data',function (data) { | |
source += data; | |
}).on('end', function () { | |
try { | |
var contentType = res.headers['content-type']; | |
html = new Buffer(source, 'binary'); | |
if (/GBK/i.test(contentType)) { | |
html = iconv.decode(html, 'GBK'); | |
} else { | |
html = iconv.decode(html, 'utf8'); | |
} | |
callback && callback(null, html); | |
} catch (ex) { | |
callback && callback(ex, html); | |
} | |
}); | |
}).on('error', function (err) { | |
console.log('http get error:', err); | |
callback && callback(err, null); | |
}); | |
} | |
/** | |
* 简单写方法,在path不存在的情况下也那写进去 | |
* @param pathName | |
* @param str | |
* @param [callback] | |
*/ | |
function simpleWrite(pathName, str, callback) { | |
if (typeof str == "object") { | |
str = JSON.stringify(str); | |
} | |
var name = path.basename(pathName); | |
var docPath = path.dirname(pathName); | |
if (!fs.existsSync(docPath)) | |
mkdirp.sync(docPath, 0755); | |
fs.writeFile(docPath + '/' + name, str, function (err) { | |
if (err) return console.log(err); | |
console.log("write success" + pathName); | |
if (callback) callback(err); | |
}); | |
} | |
/** | |
* 下图片 | |
* @param uri | |
* @param filename | |
* @param callback | |
*/ | |
function download(uri, filename, callback) { | |
request.head(uri, function (err, res, body) { | |
if (err) return; | |
console.log('content-type:', res.headers['content-type']); | |
console.log('content-length:', res.headers['content-length']); | |
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback); | |
}); | |
} | |
/** | |
* 递归拿root里面的所有文件 | |
* @param root | |
* @returns {Array} | |
*/ | |
function getAllFiles(root) { | |
var res = [] , files = fs.readdirSync(root); | |
files.forEach(function (file) { | |
var pathName = root + '/' + file; | |
var stat = fs.lstatSync(pathName); | |
if (!stat.isDirectory()) { | |
res.push(pathName); | |
} else { | |
res = res.concat(getAllFiles(pathName)); | |
} | |
}); | |
return res; | |
} | |
/** | |
* 批量重命名 | |
* @param sourceDoc 源地址 | |
* @param destinationDoc 目标地址 | |
* @param renameOperation 如何重命名 | |
* @param isMove 是否删掉之前的 | |
* | |
* | |
* // 重命名test Demo | |
* ioHelper.filesRename('/Users/ted/Downloads/info', '/Users/ted/Downloads/renameInfo', function (name) { | |
var arr = name.split('.'); | |
if(!arr[0]){return;} | |
arr[1] = 'txt'; | |
arr[0] = 'schoolInfo-' + arr[0]; | |
return arr.join('.'); | |
}); | |
*/ | |
function filesRename(sourceDoc, destinationDoc, renameOperation, isMove) { | |
var fileArray = getAllFiles(sourceDoc); | |
fileArray.forEach(function (pathName) { | |
var name = path.basename(pathName); | |
if (renameOperation) { | |
name = renameOperation(name); | |
} | |
if (name) { | |
destinationDoc = destinationDoc || path.dirname(pathName); | |
var finalPath = destinationDoc + '/' + name; | |
if (!fs.existsSync(destinationDoc)) | |
mkdirp.sync(destinationDoc, 0775); | |
if (!isMove) { | |
fileCopy(pathName, finalPath); | |
} else { | |
fs.renameSync(pathName, finalPath); | |
} | |
} | |
}) | |
} | |
function fileCopy(oldPath, newPath) { | |
if (!fs.existsSync(path.dirname(newPath))) | |
mkdirp.sync(newPath, 0775); | |
fs.createReadStream(oldPath).pipe(fs.createWriteStream(newPath)); | |
} | |
module.exports = exports = { | |
simpleWrite: simpleWrite, | |
getHtml: getHtml, | |
download: download, | |
getAllFiles: getAllFiles, | |
filesRename: filesRename, | |
fileCopy: fileCopy | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<module type="WEB_MODULE" version="4"> | |
<component name="NewModuleRootManager" inherit-compiler-output="true"> | |
<exclude-output /> | |
<content url="file://$MODULE_DIR$" /> | |
<orderEntry type="sourceFolder" forTests="false" /> | |
</component> | |
</module> | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var ioHelper = require('./ioHelper'); | |
function IoPool() { | |
this.totalRequest = 0; | |
this.aliveRequest = 0; | |
this.waitingQueue = []; | |
this.maxRequestInSameTime = 40; | |
this.onDone = null; | |
this.canceled = false; | |
} | |
IoPool.prototype = { | |
cancel: function () { | |
this.totalRequest = 0; | |
this.aliveRequest = 0; | |
this.waitingQueue = []; | |
}, | |
start: function () { | |
var self = this; | |
self.checkQueue(); | |
}, | |
push: function (url, fn) { | |
var self = this; | |
self.totalRequest++; | |
self.waitingQueue.push({url: url, callback: fn}); | |
}, | |
checkQueue: function () { | |
var self = this; | |
if (self.waitingQueue.length > 0) { | |
var left = self.maxRequestInSameTime - self.aliveRequest; | |
left = Math.min(left, self.waitingQueue.length); | |
while (left--) { | |
var currentRequest = self.waitingQueue.shift(); | |
self.aliveRequest++; | |
self.log(); | |
ioHelper.getHtml(currentRequest.url, (function (currentRequest) { | |
return function () { | |
self.aliveRequest--; | |
var callback = currentRequest.callback; | |
if (callback) { | |
callback.apply(this, arguments); | |
} | |
self.checkQueue(); | |
}; | |
})(currentRequest)); | |
} | |
} else { | |
self.log('done'); | |
if (self.onDone && (self.aliveRequest == 0) && !self.doned) { | |
self.doned = true; | |
self.onDone.call(self); | |
} | |
} | |
}, | |
log: function (tag) { | |
var self = this; | |
var percentage = ((self.waitingQueue.length + self.aliveRequest) / self.totalRequest * 100) >>0 ; | |
console.log("tag:" + (tag || "default"), "total:" + self.totalRequest, "waitingQueue:" + self.waitingQueue.length, "requesting:" + self.aliveRequest, "left:" +percentage + "%"); | |
} | |
}; | |
module.exports = exports = IoPool; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment