Last active
December 17, 2015 00:11
-
-
Save sh2/5519338 to your computer and use it in GitHub Desktop.
Wikipedia Route Search program
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package wikipedia.searcher; | |
import java.sql.Connection; | |
import java.sql.DriverManager; | |
import java.sql.PreparedStatement; | |
import java.sql.ResultSet; | |
import java.sql.SQLException; | |
import java.sql.Statement; | |
import java.util.ArrayDeque; | |
import java.util.ArrayList; | |
import java.util.Deque; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Random; | |
import java.util.Set; | |
import wikipedia.WikipediaException; | |
public class RouteSearcher { | |
public static final int MAX_DISTANCE = 6; | |
public static final String JDBC_USER = "wikipedia"; | |
public static final String JDBC_PASS = ""; | |
private static final String SELECT_TITLE_ALL_ID = "SELECT id FROM title WHERE redirect = 0"; | |
private static final String SELECT_TITLE_TITLE = "SELECT title FROM title WHERE id = ?"; | |
private TransparentLinkMap forwardLinkMap; | |
private TransparentLinkMap backwardLinkMap; | |
public static void main(String[] args) { | |
String url = null; | |
Connection connection = null; | |
RouteSearcher searcher = null; | |
PreparedStatement selectTitleTitle = null; | |
if (args.length != 1 && args.length != 3) { | |
System.err | |
.println("java wikipedia.searcher.RouteSearcher server [source] [destination]"); | |
System.exit(1); | |
} | |
url = "jdbc:mysql://" + args[0] + "/wikipedia?useServerPrepStmts=true"; | |
try { | |
connection = DriverManager.getConnection(url, JDBC_USER, JDBC_PASS); | |
searcher = new RouteSearcher(connection); | |
selectTitleTitle = connection.prepareStatement(SELECT_TITLE_TITLE); | |
if (args.length == 3) { | |
int source = Integer.parseInt(args[1]); | |
int destination = Integer.parseInt(args[2]); | |
List<Integer> route = searcher.search(source, destination); | |
System.out.println("SOURCE : [" + source + "]" | |
+ searcher.getTitle(selectTitleTitle, source) + ", DESTINATION : [" | |
+ destination + "]" + searcher.getTitle(selectTitleTitle, destination)); | |
searcher.printRoute(selectTitleTitle, route); | |
} else { | |
searcher.randomTest(connection, selectTitleTitle); | |
} | |
} catch (SQLException e) { | |
e.printStackTrace(); | |
} catch (WikipediaException e) { | |
e.printStackTrace(); | |
} finally { | |
if (selectTitleTitle != null) { | |
try { | |
selectTitleTitle.close(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
} | |
if (searcher != null) { | |
searcher.close(); | |
} | |
if (connection != null) { | |
try { | |
connection.rollback(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
try { | |
connection.close(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
} | |
} | |
} | |
public RouteSearcher(Connection connection) throws WikipediaException { | |
this.forwardLinkMap = new TransparentLinkMap(connection, true); | |
this.backwardLinkMap = new TransparentLinkMap(connection, false); | |
} | |
public List<Integer> search(int source, int destination) throws WikipediaException { | |
forwardLinkMap.resetCounter(); | |
backwardLinkMap.resetCounter(); | |
int distance = 0; | |
int intersection = 0; | |
boolean isFound = false; | |
Deque<Integer> deque = new ArrayDeque<Integer>(); | |
Map<Integer, Integer> forwardDestinationMap = new HashMap<Integer, Integer>(); | |
forwardDestinationMap.put(source, 0); | |
Set<Integer> forwardNextSet = new HashSet<Integer>(); | |
forwardNextSet.add(source); | |
Map<Integer, Integer> backwardDestinationMap = new HashMap<Integer, Integer>(); | |
backwardDestinationMap.put(destination, 0); | |
Set<Integer> backwardNextSet = new HashSet<Integer>(); | |
backwardNextSet.add(destination); | |
if (backwardNextSet.contains(source)) { | |
// 距離0 | |
intersection = source; | |
isFound = true; | |
} else { | |
SEARCH: for (distance = 1; distance <= MAX_DISTANCE; distance++) { | |
Set<Integer> workingSet = null; | |
// 頂点数の少ない方から探索する | |
if (forwardNextSet.size() < backwardNextSet.size()) { | |
// 前方探索 | |
workingSet = forwardNextSet; | |
forwardNextSet = new HashSet<Integer>(); | |
for (int id1 : workingSet) { | |
for (int id2 : forwardLinkMap.get(id1)) { | |
if (!forwardDestinationMap.containsKey(id2)) { | |
forwardDestinationMap.put(id2, id1); | |
forwardNextSet.add(id2); | |
} | |
if (backwardNextSet.contains(id2)) { | |
// backward側の先端に触れたら探索終了 | |
intersection = id2; | |
isFound = true; | |
break SEARCH; | |
} | |
} | |
} | |
if (forwardNextSet.isEmpty()) { | |
// 経路が塞がっていたら探索終了 | |
break SEARCH; | |
} | |
} else { | |
// 後方探索 | |
workingSet = backwardNextSet; | |
backwardNextSet = new HashSet<Integer>(); | |
for (int id2 : workingSet) { | |
for (int id1 : backwardLinkMap.get(id2)) { | |
if (!backwardDestinationMap.containsKey(id1)) { | |
backwardDestinationMap.put(id1, id2); | |
backwardNextSet.add(id1); | |
} | |
if (forwardNextSet.contains(id1)) { | |
// forward側の先端に触れたら探索終了 | |
intersection = id1; | |
isFound = true; | |
break SEARCH; | |
} | |
} | |
} | |
if (backwardNextSet.isEmpty()) { | |
// 経路が塞がっていたら探索終了 | |
break SEARCH; | |
} | |
} | |
} | |
} | |
if (isFound) { | |
deque.offerFirst(intersection); | |
// 前方の経路取得 | |
int parent = forwardDestinationMap.get(intersection); | |
while (parent != 0) { | |
deque.offerFirst(parent); | |
parent = forwardDestinationMap.get(parent); | |
} | |
// 後方の経路取得 | |
parent = backwardDestinationMap.get(intersection); | |
while (parent != 0) { | |
deque.offerLast(parent); | |
parent = backwardDestinationMap.get(parent); | |
} | |
} | |
return new ArrayList<Integer>(deque); | |
} | |
public void close() { | |
forwardLinkMap.close(); | |
backwardLinkMap.close(); | |
} | |
private void randomTest(Connection connection, PreparedStatement selectTitle) | |
throws WikipediaException { | |
List<Integer> idList = new ArrayList<Integer>(); | |
Random random = new Random(); | |
Statement selectTitleAllId = null; | |
ResultSet resultSet = null; | |
try { | |
selectTitleAllId = connection.createStatement(); | |
resultSet = selectTitleAllId.executeQuery(SELECT_TITLE_ALL_ID); | |
while (resultSet.next()) { | |
idList.add(resultSet.getInt(1)); | |
} | |
resultSet.close(); | |
selectTitleAllId.close(); | |
while (true) { | |
int source = idList.get(random.nextInt(idList.size())); | |
int destination = idList.get(random.nextInt(idList.size())); | |
List<Integer> route = search(source, destination); | |
System.out.println("SOURCE : [" + source + "]" + getTitle(selectTitle, source) | |
+ ", DESTINATION : [" + destination + "]" | |
+ getTitle(selectTitle, destination)); | |
printRoute(selectTitle, route); | |
System.out.println(); | |
} | |
} catch (SQLException e) { | |
throw new WikipediaException(e); | |
} finally { | |
if (resultSet != null) { | |
try { | |
resultSet.close(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
} | |
if (selectTitleAllId != null) { | |
try { | |
selectTitleAllId.close(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
} | |
} | |
} | |
private void printRoute(PreparedStatement selectTitle, List<Integer> route) | |
throws WikipediaException { | |
if (route.size() == 0) { | |
System.out.println("NOT FOUND"); | |
} else { | |
boolean isFirst = true; | |
System.out.print("FOUND, DISTANCE : " + (route.size() - 1) + ", "); | |
for (int id : route) { | |
if (!isFirst) { | |
System.out.print(" => "); | |
} | |
isFirst = false; | |
System.out.print("[" + id + "]" + getTitle(selectTitle, id)); | |
} | |
System.out.println(); | |
} | |
System.out.println("FORWARD, " + forwardLinkMap.getStatistics()); | |
System.out.println("BACKWARD, " + backwardLinkMap.getStatistics()); | |
} | |
private String getTitle(PreparedStatement selectTitle, int id) throws WikipediaException { | |
String title = null; | |
ResultSet resultSet = null; | |
try { | |
selectTitle.clearParameters(); | |
selectTitle.setInt(1, id); | |
resultSet = selectTitle.executeQuery(); | |
if (resultSet.next()) { | |
title = resultSet.getString(1); | |
} | |
} catch (SQLException e) { | |
throw new WikipediaException(e); | |
} finally { | |
if (resultSet != null) { | |
try { | |
resultSet.close(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
} | |
} | |
return title; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package wikipedia.searcher; | |
import java.sql.Connection; | |
import java.sql.PreparedStatement; | |
import java.sql.ResultSet; | |
import java.sql.SQLException; | |
import java.util.ArrayList; | |
import java.util.LinkedHashMap; | |
import java.util.List; | |
import java.util.Map; | |
import wikipedia.WikipediaException; | |
/** | |
* linkテーブルをHashMapのように扱うためのクラスです。 | |
* linkテーブルの全データをMapに格納せず必要に応じてデータベースから | |
* 取得するとともに、高速化のため一定数のデータをキャッシュします。 | |
* | |
* @author Sadao Hiratsuka | |
*/ | |
public class TransparentLinkMap { | |
private static final String SELECT_LINK_FORWARD_ID = "SELECT id2 FROM link WHERE id1 = ?"; | |
private static final String SELECT_LINK_BACKWARD_ID = "SELECT id1 FROM link WHERE id2 = ?"; | |
private Map<Integer, List<Integer>> cache; | |
private PreparedStatement selectLinkId; | |
private long getCount; | |
private long sqlCount; | |
public TransparentLinkMap(Connection connection, boolean isForward) throws WikipediaException { | |
this.cache = new LinkCache(); | |
try { | |
if (isForward) { | |
this.selectLinkId = connection.prepareStatement(SELECT_LINK_FORWARD_ID); | |
} else { | |
this.selectLinkId = connection.prepareStatement(SELECT_LINK_BACKWARD_ID); | |
} | |
} catch (SQLException e) { | |
if (selectLinkId != null) { | |
try { | |
selectLinkId.close(); | |
} catch (SQLException e2) { | |
// 何もしない | |
} | |
} | |
throw new WikipediaException(e); | |
} | |
} | |
public List<Integer> get(Integer key) throws WikipediaException { | |
getCount++; | |
if (cache.containsKey(key)) { | |
return cache.get(key); | |
} else { | |
sqlCount++; | |
ArrayList<Integer> value = new ArrayList<Integer>(); | |
ResultSet resultSet = null; | |
try { | |
selectLinkId.clearParameters(); | |
selectLinkId.setInt(1, key); | |
resultSet = selectLinkId.executeQuery(); | |
while (resultSet.next()) { | |
value.add(resultSet.getInt(1)); | |
} | |
value.trimToSize(); | |
cache.put(key, value); | |
} catch (SQLException e) { | |
if (selectLinkId != null) { | |
try { | |
selectLinkId.close(); | |
} catch (SQLException e2) { | |
// 何もしない | |
} | |
} | |
throw new WikipediaException(e); | |
} finally { | |
if (resultSet != null) { | |
try { | |
resultSet.close(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
} | |
} | |
return value; | |
} | |
} | |
public void resetCounter() { | |
this.getCount = 0; | |
this.sqlCount = 0; | |
} | |
public String getStatistics() { | |
int hitRate = 0; | |
if (getCount != 0) { | |
hitRate = (int) (100L * (getCount - sqlCount) / getCount); | |
} | |
return "GET_COUNT : " + getCount + ", SQL_COUNT : " + sqlCount + ", HIT_RATE : " + hitRate | |
+ " CACHE_SIZE : " + cache.size(); | |
} | |
public void close() { | |
if (selectLinkId != null) { | |
try { | |
selectLinkId.close(); | |
} catch (SQLException e) { | |
// 何もしない | |
} | |
} | |
} | |
private class LinkCache extends LinkedHashMap<Integer, List<Integer>> { | |
public static final int CACHE_SIZE = 10000; | |
private static final long serialVersionUID = 1L; | |
public LinkCache() { | |
super(CACHE_SIZE, 1.0f, true); | |
} | |
@Override | |
protected boolean removeEldestEntry(java.util.Map.Entry<Integer, List<Integer>> eldest) { | |
if (size() > CACHE_SIZE) { | |
return true; | |
} else { | |
return false; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment