Revisions
-
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -17,7 +17,7 @@ import ( func main() { if len(os.Args) < 4 { fmt.Printf("Too few arguments. Usage: %s inputFile stopwordFile outputFile \n", os.Args[0]) os.Exit(1) } inFile, err := os.Open(os.Args[1]) -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -17,7 +17,7 @@ import ( func main() { if len(os.Args) < 4 { fmt.Printf("Too few arguments. Usage: %s inputFile stopwordFile outputFile \n", os.Args[0]) os.Exit(2) } inFile, err := os.Open(os.Args[1]) -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -16,7 +16,7 @@ import ( func main() { if len(os.Args) < 4 { fmt.Printf("Too few arguments. Usage: %s inputFile stopwordFile outputFile \n", os.Args[0]) return } -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -61,7 +61,7 @@ func main() { for { line, r_err := reader.ReadString('\n') if r_err != nil && r_err != io.EOF { panic(r_err) } nonalphanumeric, err := regexp.Compile(`\W`) @@ -83,7 +83,6 @@ func main() { if len(filtered) > 0 { csv := strings.Join(filtered, ",") if _, err := writer.WriteString(csv + "\n"); err != nil { panic(err) } -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -43,6 +43,7 @@ func main() { word := strings.TrimSpace(line) stopwords[word] = true if err == io.EOF { break } -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -40,6 +40,7 @@ func main() { if err != nil && err != io.EOF { panic(err) } word := strings.TrimSpace(line) stopwords[word] = true if err == io.EOF { -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -25,16 +25,15 @@ func main() { panic(err) } defer inFile.Close() reader := bufio.NewReader(infile) stopwordFile, err := os.Open(os.Args[2]) if err != nil { panic(err) } defer stopWordFile.Close() stopwordReader := bufio.NewReader(stopwordfile) stopwords := make(map[string]bool) for { line, err := stopwordReader.ReadString('\n') @@ -53,7 +52,6 @@ func main() { panic(err) } defer outFile.Close() writer := bufio.NewWriter(outfile) // remove nonalphanumeric characters, lowercase, -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 8 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,24 +20,24 @@ func main() { return } inFile, err := os.Open(os.Args[1]) if err != nil { panic(err) } defer inFile.Close() reader := bufio.NewReader(infile) stopwordFile, err := os.Open(os.Args[2]) if err != nil { panic(err) } defer stopWordFile.Close() stopwordReader := bufio.NewReader(stopwordfile) stopwords := make(map[string]bool) for { line, err := stopwordReader.ReadString('\n') if err != nil && err != io.EOF { panic(err) } @@ -48,11 +48,11 @@ func main() { } } outFile, err := os.Create(os.Args[3]) if err != nil { panic(err) } defer outFile.Close() writer := bufio.NewWriter(outfile) -
Alex Kesling revised this gist
Apr 6, 2013 . 1 changed file with 6 additions and 17 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,25 +20,20 @@ func main() { return } infile, err := os.Open(os.Args[1]) if err != nil { panic(err) } defer infile.Close() reader := bufio.NewReader(infile) stopwordfile, err := os.Open(os.Args[2]) if err != nil { panic(err) } defer stopwordfile.Close() stopwordreader := bufio.NewReader(stopwordfile) stopwords := make(map[string]bool) for { @@ -53,18 +48,12 @@ func main() { } } outfile, err := os.Create(os.Args[3]) if err != nil { panic(err) } defer outfile.Close() writer := bufio.NewWriter(outfile) // remove nonalphanumeric characters, lowercase, -
cdfox revised this gist
Apr 6, 2013 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,7 @@ // For each line of the input file, remove nonalphanumeric characters, // lowercase all letters, remove stopwords, and write the result to the output // file. I used the answer here as a template for reading/writing files: // http://stackoverflow.com/questions/1821811/how-to-read-write-from-to-file/9739903#9739903 package main -
cdfox revised this gist
Apr 6, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,7 +5,7 @@ package main import ( "bufio" "fmt" "io" "os" -
cdfox renamed this gist
Apr 6, 2013 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
cdfox created this gist
Apr 6, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,110 @@ // For each line of the input file, remove nonalphanumeric characters, // lowercase all letters, remove stopwords, and write the result to the output // file. package main import ( "bufio" "fmt" "io" "os" "regexp" "strings" ) func main() { if len(os.Args) < 4 { fmt.Println("Too few arguments. Usage: preprocess IN_FILE STOPWORD_FILE OUT_FILE") return } // open input file infile, err := os.Open(os.Args[1]) if err != nil { panic(err) } // close infile on exit and check for its returned error defer func() { if infile.Close() != nil { panic(err) } }() // make a read buffer reader := bufio.NewReader(infile) // build stopword set stopwordfile, err := os.Open(os.Args[2]) if err != nil { panic(err) } stopwordreader := bufio.NewReader(stopwordfile) stopwords := make(map[string]bool) for { line, err := stopwordreader.ReadString('\n') if err != nil && err != io.EOF { panic(err) } word := strings.TrimSpace(line) stopwords[word] = true if err == io.EOF { break } } // open output file outfile, err := os.Create(os.Args[3]) if err != nil { panic(err) } // close outfile on exit and check for its returned error defer func() { if outfile.Close() != nil { panic(err) } }() // make a write buffer writer := bufio.NewWriter(outfile) // remove nonalphanumeric characters, lowercase, // and remove stopwords for each line for { line, r_err := reader.ReadString('\n') if r_err != nil && r_err != io.EOF { panic(err) } nonalphanumeric, err := regexp.Compile(`\W`) if err != nil { panic(err) } alphanumeric := nonalphanumeric.ReplaceAllString(line, " ") lowercase := strings.ToLower(alphanumeric) tokens := strings.Fields(lowercase) filtered := []string{} for _, word := range tokens { if !stopwords[word] { filtered = append(filtered, word) } } if len(filtered) > 0 { csv := strings.Join(filtered, ",") // write a line if _, err := writer.WriteString(csv + "\n"); err != nil { panic(err) } } if r_err == io.EOF { break } } if err = writer.Flush(); err != nil { panic(err) } }