#!/usr/bin/env python

"""
SYNOPSIS

    histogram [-h] [-v,--verbose] [--version] [-s, --sort] [-n, --nozero] {FILENAME}

DESCRIPTION

    This generates a histogram of the bytes in a file.
    If no FILENAME is given or if the FILENAME is '-' then
    this will read from the standard input.

    This docstring will be printed by the script if there is an error or
    if the user requests help (-h or --help).

EXAMPLES

    $ dd if=/dev/urandom of=random.bin bs=1 count=10000
    $ histogram random.bin

    $ dd if=/dev/urandom bs=1 count=10000 | histogram

    $ dd if=/dev/urandom bs=1 count=10000 | histogram --sort

    $ dd if=/dev/urandom bs=1 count=10000 | histogram --sort --nozero

EXIT STATUS

    This exits with status 0 on success and 1 otherwise.
    This exist with a status greater than 1 if there was an
    unexpected run-time error.

AUTHOR

    Noah Spurrier <noah@noah.org>

LICENSE

    This license is OSI and FSF approved as GPL-compatible.
    This license identical to the ISC License and is registered with and
    approved by the Open Source Initiative. For more information vist:
        http://opensource.org/licenses/isc-license.txt

    Copyright (c) 2013, Noah Spurrier

    Permission to use, copy, modify, and/or distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
    copyright notice and this permission notice appear in all copies.

    THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

VERSION

    Version 2
"""

import sys
import os
import traceback
import optparse
import time
import string

ASCII_chart = [
    '^@ NULL',
    '^A SOH', '^B STX', '^C ETX', '^D EOT', '^E ENQ', '^F ACK',
    '^G BEL', '^H BS', '^I HT', '^J LF', '^K VT', '^L FF', '^M CR',
    '^N SO', '^O SI', '^P DLE', '^Q DC1', '^R DC2', '^S DC3', '^T DC4',
    '^U NAK', '^V SYN', '^W ETB', '^X CAN', '^Y EM', '^Z SUB',
    '^[ ESC', '^\ FS, IS4', '^] GS, IS3', '^^ RS, IS2', '^_ US, IS1',
    'SPACE',
    '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/',
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    ':', ';', '<', '=', '>', '?', '@',
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
    'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
    'U', 'V', 'W', 'X', 'Y', 'Z',
    '[', '\\', ']', '^', '_', '`',
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
    'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
    'u', 'v', 'w', 'x', 'y', 'z',
    '{', '|', '}', '~',
    'DEL',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', '', '', '',
    '', '', '', '', '', '', '', ''
]

def main(options, args):

    # Figure out where to get input (file or stdin).
    # Use stdin if no filename is given or if filename is '-'.
    if len(args) > 0:
        filename = args[0]
        if filename == '-':
            fin = sys.stdin
        else:
            fin = open(filename)
    else:
        fin = sys.stdin

    if fin == sys.stdin:
        print('# filename: None -- using stdin')
    else:
        print('# filename: %s' % filename)

    histogram = [0] * 256
    for bb in fin.read():
        histogram[ord(bb)] += 1
    index_of_max_count = histogram.index(max(histogram))
    index_of_min_count = histogram.index(min(histogram))

    print('# max count byte %3d: %d'
          % (index_of_max_count, histogram[index_of_max_count]))
    print('# min count byte %3d: %d'
          % (index_of_min_count, histogram[index_of_min_count]))
    print('#')
    print('# byte  count  "ASCII chart"')
    if options.sort:
        hd = dict(zip(range(0,256),histogram))
        for ii in sorted(hd, key=hd.get, reverse=1):
            if not options.nozero or histogram[ii] > 0:
                ascii_char = ASCII_chart[ii]
                print('%3d %9d  %s' % (ii, histogram[ii], ascii_char))
    else:
        for ii in range(0, 256):
            if not options.nozero or histogram[ii] > 0:
                ascii_char = ASCII_chart[ii]
                print('%3d %9d  %s' % (ii, histogram[ii], ascii_char))


if __name__ == '__main__':

    try:
        start_time = time.time()
        parser = optparse.OptionParser(
            formatter=optparse.TitledHelpFormatter(),
            usage=globals()['__doc__'],
            version='2')
        parser.add_option('-v', '--verbose', action='store_true',
                          default=False, help='verbose output')
        parser.add_option('-s', '--sort', action='store_true',
                          default=False, help='sort output by count')
        parser.add_option('-n', '--nozero', action='store_true',
                          default=False, help='do not print bytes with zero count')
        (options, args) = parser.parse_args()
        #if len(args) < 1:
        #    parser.error ('missing argument')
        if options.verbose:
            print(time.asctime())
        exit_code = main(options, args)
        if exit_code is None:
            exit_code = 0
        if options.verbose:
            print(time.asctime())
            print('TOTAL TIME IN MINUTES: %f'
                  % ((time.time() - start_time) / 60.0))
        sys.exit(exit_code)
    except KeyboardInterrupt, e:  # The user pressed Ctrl-C.
        raise e
    except SystemExit, e:  # The script called sys.exit() somewhere.
        raise e
    except Exception, e:
        print('ERROR: Unexpected Exception')
        print(str(e))
        traceback.print_exc()
        os._exit(2)

# vim:set ft=python fileencoding=utf-8 sr et ts=4 sw=4 : See help 'modeline'
