#!/usr/bin/env python3

import os
import re
import sys
import zlib

MIN_SIZE = 512

def filter_files(files):
    result = []
    for f in files:
        if os.path.getsize(f) >= MIN_SIZE:
            result.append(f)
    return result

def pairs(ls):
    result = {}
    for e in ls:
        for f in ls:
            if e < f:
                result[e, f] = 1
    return result.keys()

def snarf_file(path, cache={}):
    if not path in cache:
        with open(path, 'r') as fh:
            res = fh.read()
            cache[path] = re.sub(r'[^a-zA-Z]+', ' ', res)
    return cache[path]

def compress_string(str):
    return zlib.compress(str.encode("utf-8"))

def do_compare(files):
    content = [snarf_file(file) for file in files]
    comp = [len(compress_string(c)) for c in content]
    uncat = sum(comp)
    catf = len(compress_string(''.join(content)))
    catb = len(compress_string(''.join(reversed(content))))
    return uncat/min(catf, catb) - 1

if __name__ == "__main__":
    result = []
    for pair in pairs(filter_files(sys.argv[1:])):
        result.append((pair, do_compare(pair)))

    result.sort(key=lambda x: x[1])
    for item in result:
        print(item)
