#! /usr/bin/env python

import os
import os.path
import shutil



# Parameters...
problems = ['shuttle', 'gait', 'digits']
post = '_results'

approaches = ['p_wrong_soft', 'p_wrong_hard', 'entropy', 'outlier', 'random']
queries = 200



# Get all the directories that contain stuff we care about...
data_dirs = dict()
for pn in os.listdir('.'):
  if os.path.isdir(pn):
    for problem in problems:
      prefix = problem + post
      if pn[:len(prefix)]==prefix:
        data_dirs[pn] = problem
        break
print 'Found %i directories to process'%len(data_dirs)



# Get all the file names we care about...
data_files = dict()
for pn, problem in data_dirs.iteritems():
  for fn in os.listdir(pn):
    if fn[-4:]=='.csv':
      for alg in approaches:
        if fn[:len(alg)]==alg:
          data_files[pn+'/'+fn] = (problem,alg)
          break
print 'Found %i files to process'%len(data_files)



# Open each file and extract the soft, juicy results...
data_classes = dict()
data_inlier = dict()

for fn, key in data_files.iteritems():
  f = open(fn,'r')
  f.readline()
  
  classes = []
  inlier = []
  
  for l in f.readlines():
    qu, cl, i, np = l.split(',')
    classes.append(int(cl))
    inlier.append(float(i))
  
  if len(classes)==queries:
    if key not in data_classes: data_classes[key] = []
    if key not in data_inlier: data_inlier[key] = []
    
    data_classes[key].append(classes)
    data_inlier[key].append(inlier)

print 'Found %i problem/algorithm combos'%len(data_classes)



# Make an output directory...
try: shutil.rmtree('results')
except: pass
os.mkdir('results')



# Generate averages, write them out...
for (problem, alg), classes in data_classes.iteritems():
  inlier = data_inlier[problem,alg]
  print '%s: %s: %i samples'%(problem, alg, len(classes))
  
  # Generate averages...
  classes_avg = [0.0]*queries
  for ii, cl in enumerate(classes):
    classes_avg = map(lambda a,b: a + (b-a)/(ii+1), classes_avg, cl)

  inlier_avg = [0.0]*queries
  for ii, il in enumerate(inlier):
    inlier_avg = map(lambda a,b: a + (b-a)/(ii+1), inlier_avg, il)

  # Write to file...
  out = open('results/%s_%s.csv'%(problem, alg), 'w')
  out.write('queries, classes, inliers\n')
  for qu, cl, il in zip(xrange(1,queries+1), classes_avg, inlier_avg):
    out.write('%i, %.3f, %.4f\n'%(qu, cl, il))
  out.close()

