#!/usr/bin/env python

import sys
import optparse
import string
from hostlist import collect_hostlist

optp = optparse.OptionParser(usage="usage: %prog [options] < DATA")
optp.add_option("-k", "--key",
                action="store", type="int", default=None,
                help="use data at position KEY (default: auto)")
optp.add_option("-n", "--nbuckets",
                action="store", type="int", default=5,
                help="number of buckets to use (default: %default)")
optp.add_option("-v", "--verbose",
                action="store_true", default=False)

(opts, args) = optp.parse_args(sys.argv[1:])

if args != []:
    optp.print_help()
    sys.exit(1)

data = sys.stdin.readlines()

if opts.verbose:
    print "Info: read in %i lines of data" % len(data)

if not opts.key:
    sline = data[0].strip().split()
    if len(sline) < 2:
        print "Error: input data has only one column"
        sys.exit(1)
    # The first column that can be converted to an int will be our key
    for i in [ x + 1 for x in range(len(sline[1:])) ]:
        tmp = None
        try:
            tmp = int(sline[i])
        except ValueError:
            pass
        if tmp:
            if opts.verbose:
                print "Info: auto-detect found data at column %i" % i
            opts.key = i
            break
    
    if not opts.key:
        print "Error: no key given and/or autodetect failure"
        sys.exit(1)

valuelist = []
for line in data:
    sline = line.strip().split()
    valuelist.append(int(sline[opts.key]))

valuelist.sort()

minvalue = valuelist[0]
maxvalue = valuelist[-1]

if opts.verbose:
    print "Info: smallest value found was %i" % minvalue
    print "Info: largest value found was %i" % maxvalue

nval = maxvalue - minvalue + 1

if opts.nbuckets < 1 or opts.nbuckets > nval:
    print "Error: number of buckets(%i) not suitable for number of values in data(%i)" % (
        opts.nbuckets, nval)
    sys.exit(1)

if opts.verbose:
    print "Info: will use %i buckets" % opts.nbuckets

buckets = range(opts.nbuckets)
nvpb = nval / float(opts.nbuckets)
next = minvalue

#print "nval: %i" % nval
#print "nvpb: %f" % nvpb

for b in buckets:
    buckets[b] = {}
    buckets[b]['start'] = next
    buckets[b]['end'] = int(round( ((b+1) * nvpb + (minvalue - 1)) ))
    next = buckets[b]['end'] + 1
    buckets[b]['nodes'] = []

valuedict = {}
for line in data:
    val = int(line.strip().split()[opts.key])
    node = string.strip(line.strip().split()[0], ':')
    if not val in valuedict:
        valuedict[val] = []
    valuedict[val].append(node)

for b in range(len(buckets)):
#    print buckets[b]['start']
#    print buckets[b]['end']
    for i in range(buckets[b]['start'], buckets[b]['end'] + 1):
        if i in valuedict:
            buckets[b]['nodes'].extend(valuedict[i])

maxnperbucket = 1
for bucket in buckets:
    nperbucket = len(bucket['nodes'])
    maxnperbucket = max(maxnperbucket, nperbucket)

ncharvalue = max(len(str(maxvalue)), 3)
ncharnodecnt = max(len(str(maxnperbucket)), 3)

#print "ncharvalue: %i" % ncharvalue
#print "ncharnodecnt: %i" % ncharnodecnt

if opts.verbose:
    print
    print "%sLOW-%sHI: %sCNT  HOSTLIST" % (" " * (ncharvalue - 3),
                                           " " * (ncharvalue - 2),
                                           " " * (ncharnodecnt - 3))
    print "-" * 80

# Main output print
for bucket in buckets:
    pad1 = (ncharvalue - len(str(bucket['start']))) * " "
    pad2 = (ncharvalue - len(str(bucket['end']))) * " "
    pad3 = (ncharnodecnt - len(str(len(bucket['nodes'])))) * " "
    print "%s%i-%s%i: %s%i  %s" % (pad1, bucket['start'],
                                   pad2, bucket['end'],
                                   pad3, len(bucket['nodes']),
                                   collect_hostlist(bucket['nodes']))
