Created
January 3, 2016 10:53
-
-
Save SriramKeerthi/7c590629b55140d855c1 to your computer and use it in GitHub Desktop.
Counts words in a file in a single thread
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sun.reflect.generics.reflectiveObjects.NotImplementedException; | |
import java.io.BufferedReader; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.util.*; | |
/** | |
* Reads a file from the command line parameter and counts the number of words in it | |
* @author Sriram | |
*/ | |
public class WordCountSingleThread | |
{ | |
private static class FileIterator implements Iterator, AutoCloseable | |
{ | |
private final BufferedReader br; | |
private String nextLine; | |
public FileIterator( String fileName ) throws IOException | |
{ | |
br = new BufferedReader( new FileReader( fileName ) ); | |
nextLine = br.readLine(); | |
} | |
@Override public boolean hasNext() | |
{ | |
return nextLine != null; | |
} | |
@Override public String next() | |
{ | |
String lineToReturn = nextLine; | |
try { | |
nextLine = br.readLine(); | |
} catch ( IOException e ) { | |
nextLine = null; | |
} | |
return lineToReturn; | |
} | |
@Override public void remove() | |
{ | |
throw new NotImplementedException(); | |
} | |
@Override public void close() throws IOException | |
{ | |
br.close(); | |
} | |
} | |
private static class Transformers | |
{ | |
public String[] mapToTokens( String input ) | |
{ | |
return input.split( "[ _\\.,\\-\\+]" ); | |
} | |
private String[] filterIllegalTokens( String[] words ) | |
{ | |
List<String> filteredList = new ArrayList<>(); | |
for ( String word : words ) { | |
if ( word.matches( "[a-zA-Z]+" ) ) { | |
filteredList.add( word ); | |
} | |
} | |
return filteredList.toArray( new String[filteredList.size()] ); | |
} | |
private String[] mapToLowerCase( String[] words ) | |
{ | |
String[] filteredList = new String[words.length]; | |
for ( int i = 0; i < words.length; i++ ) { | |
filteredList[i] = words[i].toLowerCase(); | |
} | |
return filteredList; | |
} | |
public void reduce( Map<String, Integer> counter, String word ) | |
{ | |
if ( counter.containsKey( word ) ) { | |
counter.put( word, counter.get( word ) + 1 ); | |
} else { | |
counter.put( word, 1 ); | |
} | |
} | |
} | |
public static void main( String[] args ) throws Exception | |
{ | |
Map<String, Integer> counters = new HashMap<>(); | |
Transformers tr = new Transformers(); | |
try ( FileIterator fc = new FileIterator( args[0] ) ) { | |
while ( fc.hasNext() ) { | |
String[] words = tr.mapToTokens( fc.next() ); | |
String[] legalWords = tr.filterIllegalTokens( words ); | |
String[] lowerCaseWords = tr.mapToLowerCase( legalWords ); | |
for ( String word : lowerCaseWords ) { | |
tr.reduce( counters, word ); | |
} | |
} | |
} | |
System.out.println( "Word Count:\n" + counters ); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment