Skip to content

Instantly share code, notes, and snippets.

@jelsas
Created April 23, 2010 18:19

Revisions

  1. jelsas revised this gist Apr 24, 2010. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions format_yahoo_letor_submission.py
    Original file line number Diff line number Diff line change
    @@ -7,9 +7,8 @@
    # The PREDICTIONS_FILE should just be a list of scores,
    # one per line, corresponding to the lines in INPUT_FILE

    from itertools import izip, groupby, count
    from itertools import izip, groupby
    from optparse import OptionParser
    import sys

    parser = OptionParser(
    usage='usage: %prog [options] PREDICTIONS_FILE INPUT_FILE')
  2. jelsas created this gist Apr 23, 2010.
    49 changes: 49 additions & 0 deletions format_yahoo_letor_submission.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,49 @@
    #!/usr/bin/python

    # This script takes the predictions and input vectors
    # (eg. set1.test.txt) and produces a file suitable for
    # submission to the Yahoo LETOR Challenge web interface.
    #
    # The PREDICTIONS_FILE should just be a list of scores,
    # one per line, corresponding to the lines in INPUT_FILE

    from itertools import izip, groupby, count
    from optparse import OptionParser
    import sys

    parser = OptionParser(
    usage='usage: %prog [options] PREDICTIONS_FILE INPUT_FILE')

    (options, args) = parser.parse_args()

    if len(args) != 2: parser.error('Must specify PREDICTIONS_FILE and INPUT_FILE')

    preds_file, input_file = args

    # iterator for the scores on each line of the preds_file
    scores = (float(line.split(None, 1)[0]) for line in open(preds_file))
    # iterator for the qids on each line of the input_file
    qids = (line.split(None, 2)[1] for line in open(input_file))
    # iterator that zips everything together & groups by qid
    data = groupby(izip(qids, scores), lambda x: x[0])

    # go through each query
    for (q, q_scores) in data:
    q_scores = list(q_scores)
    # calculate the sorted permutation for the q_scores list
    sorted_perm = sorted(range(len(q_scores)),
    reverse=True,
    key=lambda i: q_scores[i][1])
    # calculate the predicted ranks
    predicted_ranks = [sorted_perm.index(i) for i in range(len(q_scores))]
    print ' '.join(str(s+1) for s in predicted_ranks)

    # make sure we got to the end of the scores & qids iterators
    # both of these should raise StopIteration
    def check_iter_at_end(iter):
    try: iter.next()
    except StopIteration: pass
    else: parser.error('scores length != qids length')

    check_iter_at_end(scores)
    check_iter_at_end(qids)