#! /usr/bin/env python3

import numpy
import automark



# Load code...
injest = automark.InjestJupyter(automark.victim(),
                                cwd='../Machine Learning 1/Labs/01 - Data Exploration')



# Load data set so we can pass it in without risk of interference from the student...
data = []
with open('../Machine Learning 1/Labs/01 - Data Exploration/Skin_NonSkin.txt', 'r') as fin:
  for line in fin.readlines():
    parts = [int(v) for v in line.split()]
    if len(parts)==4:
        data.append(parts)
data = numpy.array(data)

in_r = data[:,2]
in_g = data[:,1]
in_b = data[:,0]

out = data[:,3]
out[out==2] = 0

index_bg = numpy.nonzero(out==0)[0]
index_skin = numpy.nonzero(out==1)[0]

half_bg = index_bg.shape[0] // 2
half_skin = index_skin.shape[0] // 2

index_train = numpy.concatenate((index_bg[:half_bg],index_skin[:half_skin]))
index_test = numpy.concatenate((index_bg[half_bg:],index_skin[half_skin:]))

train_in_r = in_r[index_train]
train_in_g = in_g[index_train]
train_in_b = in_b[index_train]
train_out = out[index_train]

test_in_r = in_r[index_test]
test_in_g = in_g[index_test]
test_in_b = in_b[index_test]
test_out = out[index_test]



# Question 1 (Basic Statistics)...
q1 = automark.Question(1, 1, 2)

q1.add(None, automark.ContainsCell([('94.6', '94.7'), ('67.6', '67.7'),
                                    '120.3', ('68.5', '68.6'),
                                    ('122.2', '122.3'), ('70.6', '70.7'),
                                    ('192.0', '192.1'), ('47.0', '47.1'),
                                    ('138.7', '138.8'), ('44.3', '44.4'),
                                    '107.6', ('47.1', '47.2')]))

q1(injest)



# Question 2 (The Simplest Classifier)...
def test1(injest):
  try:
    is_skin = injest['is_skin1']
  
    estimate = numpy.array([is_skin(test_in_r[i], test_in_g[i], test_in_b[i]) for i in range(test_in_r.shape[0])])

    correct = (estimate==test_out).sum()
    percentage = 100.0 * correct / float(test_in_r.shape[0])
  
    print('Info: is_skin1() percentage = {:.2f}'.format(percentage))
    return percentage>79.5 # You get 79.25 if you always predict background!
  
  except Exception as e:
    print('Warning: Exception {} in Q2 judge'.format(type(e).__name__))
    return None


q2 = automark.Question(2, 1)
q2.add(None, test1)
q2(injest)



# Question 3 (Visualisation for a Better Guess)...

q3 = automark.Question(3, 4, 8)

q3.worth('red-blue', 2, 1)
q3.mode('red-blue', 'any')
q3.add('red-blue', automark.MatchScatter((train_in_r[train_out==0], train_in_b[train_out==0]), (train_in_r[train_out==1], train_in_b[train_out==1])))
q3.add('red-blue', automark.MatchScatter((train_in_b[train_out==0], train_in_r[train_out==0]), (train_in_b[train_out==1], train_in_r[train_out==1])))

q3.worth('green-blue', 2, 1)
q3.mode('green-blue', 'any')
q3.add('green-blue', automark.MatchScatter((train_in_g[train_out==0], train_in_b[train_out==0]), (train_in_g[train_out==1], train_in_b[train_out==1])))
q3.add('green-blue', automark.MatchScatter((train_in_b[train_out==0], train_in_g[train_out==0]), (train_in_b[train_out==1], train_in_g[train_out==1])))

q3(injest)



# Question 4 (Better Classifier)...
def train_func(injest):
  # We are expecting a variable called train_func that contains an array that is some linear function of the data - first check it is the right shape, then solve the linear equation to find the equation...
  tf = injest['train_func']
  if not isinstance(tf, numpy.ndarray):
    return None
  
  if len(tf.shape)!=1 or tf.shape[0]!=train_in_r.shape[0]:
    return None
  
  data = numpy.concatenate((train_in_r[:,None], train_in_g[:,None], train_in_b[:,None]), axis=1)
  x = numpy.linalg.lstsq(data, tf, None)[0]
  
  # Verify the parameters are not stupid - that at least two are non-zero, and that we see both signs...
  good = [val for val in x if numpy.fabs(val)>0.1]
  if len(good)<2:
    return False
  
  has_neg = False
  has_pos = False
  for val in good:
    if val<0.0:
      has_neg = True
      
    else:
      has_pos = True
  
  return has_neg and has_pos


def test2(injest):
  is_skin = injest['is_skin2']
  
  estimate = numpy.array([is_skin(test_in_r[i], test_in_g[i], test_in_b[i]) for i in range(test_in_r.shape[0])])

  correct = (estimate==test_out).sum()
  percentage = 100.0 * correct / float(test_in_r.shape[0])
  
  print('Info: is_skin2() percentage = {:.2f}'.format(percentage))
  return percentage>79.5 # You get 79.25 if you always predict background!


q4 = automark.Question(4, 3)
q4.worth('func', 2, 1)
q4.worth('score2', 1, 1)

q4.add('func', train_func)
q4.add('score2', test2)

q4(injest)



# Question 5 (Machine Learning) - reverting to manual...
print('Question: 5')
print('Mark: 0--4 / 4')
print()



# Question 6 (Another Data Set) - can't auto mark this...
print('Question: 6')
print('Mark: 0--6 / 6')
print()
