#! /usr/bin/env python3

import sys
import os
import time
import datetime

import argparse
import asyncio



# Handle command line arguments...
parser = argparse.ArgumentParser(description='Runs an automarking script on every submission within a decompressed Moodle archive and generates a spreadsheet of marks.')

parser.add_argument('-e', '--extension', help='The extension of the file it is to extract from each students directory and feed to the auto marking script (if multiple files match it runs the script on all, and selects the highest scoring).', default='.ipynb')
parser.add_argument('-t', '--timeout', help='Maximum runtime of any students script, in hours. Defaults to 0.25, i.e. 15 minutes.', type=float, default=0.25)
parser.add_argument('-l', '--limit', help='Maximum number of processes to run at any given moment.', type=int, default=None)

parser.add_argument('directory', help='Directory as downloaded from Moodle, where every subdirectory is a different student.')
parser.add_argument('script', help='The script that outputs the mark (stdout) when run on a script file.')

parser.add_argument('output', help='Output csv file that contains all of the marks. Defaults to the same as the script file but with a csv extension.', nargs='?', default=None)

args = parser.parse_args()

if args.output is None:
  args.output = os.path.splitext(args.script)[0] + '.csv'

args.log = os.path.splitext(args.output)[0] + '.log'


# Bug out if output exists...
if os.path.exists(args.output):
  print('Critical: Output file already exists')
  sys.exit(1)


# Automatic CPU core count...
if args.limit is None:
  args.limit = os.cpu_count()



# Get contents of directory...
sdirs = {} # Student id -> path

prefix = 'Participant_'
for de in os.scandir(args.directory):
  if de.is_dir() and de.name.startswith(prefix):
    end = de.name.index('_', len(prefix))
    sid = int(de.name[len(prefix):end])
    
    sdirs[sid] = de.path

print('Info: Found {} directories of potential coursework'.format(len(sdirs)))



# Global to record the maximum mark for each question...
max_mark = {}



# Class for storing results in...
class Coursework:
  """Represents the marks assigned to a piece of coursework."""
  badness = {'' : 0, 'Info' : 1, 'Warning' : 2, 'Severe' : 3, 'Critical' : 4}
  
  def __init__(self, sid, fn, log, runtime):
    """Initialised with the student id, then the filename followed by the log from running the script, which is just a big string."""
    self.sid = sid
    self.fn = fn
    self.runtime = runtime
    
    self.log = [line.strip() for line in log.split('\n') if len(line.strip())>0]
    
    self.marks_low = dict()
    self.marks_high = dict()
    self.maximum = 0
    self.worst = ''
    self.worst_msg = None
    
    question = None
    for line in self.log:
      parts = line.split(':', 1)
      if len(parts)!=2:
        print('Warning: Parse error:', line)
        continue
      kind, msg = parts
      
      if kind=='Question':
        question = msg.strip()
        self.marks_low[question] = None
        self.marks_high[question] = None
      
      elif kind=='Mark':
        parts = [part.strip() for part in msg.split('/')]
        
        marks = parts[0].strip()
        if '--' in marks:
          low, high = marks.split('--')
          self.marks_low[question] = float(low)
          self.marks_high[question] = float(high)
        
        else:
          self.marks_low[question] = float(parts[0])
          self.marks_high[question] = self.marks_low[question]
        
        global max_mark
        if question not in max_mark:
          max_mark[question] = float(parts[1])
        
        elif max_mark[question] is not None and abs(max_mark[question] - float(parts[1])) > 1e-3:
          max_mark[question] = None
        
        self.maximum += float(parts[1])
        
      elif kind in self.badness:
        if self.badness[kind] > self.badness[self.worst]:
          self.worst = kind
          self.worst_msg = line
  
  
  def questions(self):
    for q in self.marks_low:
      yield q


  def total(self):
    """Returns the total marks, as (low, high)"""
    low = 0.0
    for mark in self.marks_low.values():
      if mark is not None:
        low += mark
    
    high = 0.0
    for mark in self.marks_high.values():
      if mark is not None:
        high += mark

    return low, high
  
  
  def total_str(self):
    """Like total but returns a string, where it shows a range if low/high diverge."""
    low, high = self.total()
    
    if abs(low - high) < 1e-3:
      return '{:g}'.format(high)
    
    else:
      return '{:g}--{:g}'.format(low, high)
  
  
  def question_str(self, q):
    """Returns the relevant string for a specific question, handling all cases."""
    if q not in self.marks_low:
      return ''
    
    if self.marks_low[q] is None:
      return ''
    
    if abs(self.marks_low[q] - self.marks_high[q]) < 1e-3:
      return '{:g}'.format(self.marks_high[q])
    
    return '{:g}--{:g}'.format(self.marks_low[q], self.marks_high[q])
  


# List of results - kept in student id order...
results = [] 



# Helper to make below code neater - yields all files with the given extension...
def search_dir(path):
  for de in os.scandir(path):
    if de.is_file() and de.name.endswith(args.extension):
      yield de.path
    
    elif de.is_dir():
      for ret in search_dir(de.path):
        yield ret



# Async function to run a students work...
async def judgement(sid, path):
  coursework = None
  timeout = False
  crash = False
  ext_log = []
  
  for fn in search_dir(path):
    start = time.monotonic()
    process = await asyncio.create_subprocess_exec('python3', args.script, fn, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
    
    try:
      log = b''
      log, err = await asyncio.wait_for(process.communicate(), timeout=args.timeout * 60.0 * 60.0)
      
    except asyncio.TimeoutError:
      ext_log.append('Severe: Student {} took too long'.format(sid))
      print(ext_log[-1])
      timeout = True
    
    if process.returncode==None:
      ext_log.append('Severe: Killed process')
      print(ext_log[-1])
      process.kill()
      await process.wait()
    
    if process.returncode!=0:
      ext_log.append('Severe: Student {} - automarker with code {}'.format(sid,
 process.returncode))
      print(ext_log[-1])
      crash = True

    log = log.decode('utf8')
    if len(ext_log)!=0:
      log = '\n'.join([log] + ext_log)
    
    end = time.monotonic()
    cw = Coursework(sid, fn, log, end - start)
    
    if coursework is not None and coursework.total()[1] < cw.total()[1] and coursework.total()[0] > cw.total()[0]:
      print('Warning: Multiple submissions; keeping one with highest minimum mark when another with a higher maximum mark exists')
      
    if coursework is None or coursework.total()[0] < cw.total()[0]:
      coursework = cw
  
  if coursework is None:
    if not (crash or timeout):
      print('Severe: Could not find valid file for student {}'.format(sid))
      
    coursework = Coursework(sid, os.path.join(path, '*'), '', 0.0)
    if timeout:
      coursework.worst = 'Timeout'
    elif crash:
      coursework.worst = 'Crash'
    else:
      coursework.worst = 'No file'
    results.append(coursework)
  
  else:
    results.append(coursework)
    print('Info: Marked {}, obtained {}'.format(sid, coursework.total_str()))
    if coursework.worst_msg is not None:
      print('Error Example: {}'.format(coursework.worst_msg))



# Code to run all subprocessses for all students...
async def court():
  running = set()
  
  for itr, (sid, path) in enumerate(sdirs.items()):
    if len(running)>=args.limit:
      _, running = await asyncio.wait(running, return_when=asyncio.FIRST_COMPLETED)
    
    now = datetime.datetime.now().isoformat(' ', 'seconds')
    print('Info: Starting student {} at {} ({} remain)'.format(sid, now, len(sdirs) - itr - 1))
    running.add(judgement(sid, path))

  await asyncio.wait(running)



# Do the marking...
loop = asyncio.get_event_loop()
loop.run_until_complete(court())
loop.close()

print('Info: Marked {} students'.format(len(results)))



# Generate a spreadsheet...
questions = {}
for coursework in results:
  for question in coursework.questions():
    questions[question] = True
questions = sorted(questions.keys())


with open(args.log, 'w') as flog:
  with open(args.output, 'w') as fout:
    # Header...
    fout.write('Participant')
    for question in questions:
      mm = max_mark[question] if question in max_mark else None
      mm = '?' if mm is None else '{:g}'.format(mm)
      fout.write(', Q. {} ({})'.format(question, mm))
    fout.write(', Total, Checked, Feedback, Error Level, Error Example\n')
  
    # Nice to be sorted...
    results.sort(key=lambda c: c.sid)
  
    # Each bit of coursework in turn...
    for coursework in results:
      fout.write(str(coursework.sid))
      for question in questions:
        fout.write(', {}'.format(coursework.question_str(question)))
      fout.write(', {}, , , {}, {}\n'.format(coursework.total_str(), coursework.worst, coursework.worst_msg))
      
      flog.write('#' * 80 + '\n')
      flog.write('{}:\n'.format(coursework.sid))
      flog.write('({})\n'.format(coursework.fn))
      
      for line in coursework.log:
        flog.write('  {}\n'.format(line))
      
      minutes = int(coursework.runtime / 60)
      seconds = int(coursework.runtime) - 60 * minutes
      flog.write('Runtime = {}:{:02d}'.format(minutes, seconds))
      flog.write('\n\n')

print('Info: Done!')
