#!/usr/bin/env python import sys if len(sys.argv) == 2: nbuckets = sys.argv[1] else: nbuckets = 5 print "" % nbuckets data = sys.stdin.read().split() tags = {} for i in data: tags.setdefault(i, 0) tags[i] += 1 # Remove tags that have only 1 hit ignorable = [x for x in tags if tags[x] == 1] [tags.pop(x) for x in ignorable] low, high = (0, 0) for tag, hits in tags.iteritems(): low = min(low, hits) high = max(high, hits) if low < 2: low = 2 hitrange = high - low interval = int(hitrange / nbuckets) print "" % (high, low, interval) buckets = [] for i in xrange(low, high + interval, interval): bucket_tags = [] for tag, hits in tags.iteritems(): if hits <= i: bucket_tags.append(tag) [tags.pop(tag) for tag in bucket_tags] buckets.append(bucket_tags) # Collapse buckets that are empty or have only 1 entry. for i in xrange(len(buckets)): collapse = -1 for j, bucket in enumerate(buckets): if len(bucket) < 2: collapse = j if collapse >= 0: #print "Pop: %d len %d" % (collapse, len(buckets)) buckets[collapse - 1].extend(buckets.pop(collapse)) cloud = {} for i, bucket in enumerate(buckets): for tag in bucket: cloud[tag] = i # Generate the html import random class_name = "cloud_%04x" % (int(random.random() * (1<<16))) def genlink(tag): return '%s' % (tag, tag) BASE_FONT_SIZE = 8 print '