#! /usr/bin/env python3

import numpy

import automark2 as am



# Bit of safety - limit memory usage...
am.limit_memory(32)



# Load code and print everything it outputs...
notebook = am.Notebook(cwd='../Machine Learning 2/Labs/03 - NLP')
notebook.print_cells(stop=-1)



# Q1 - Word POS...
q1_ks = am.Question('1. Kitchen Sink', 6)
q1_ks.worth(None, 6)
q1_ks.add(None, am.MrBean())
q1_ks(notebook)

q1_ls = am.Question('1. Line Search', 3)
q1_ls.worth(None, 3)
q1_ls.add(None, am.MrBean())
q1_ls(notebook)

q1_ne = am.Question('1. Nesterov', 3)
q1_ne.worth(None, 3)
q1_ne.add(None, am.MrBean())
q1_ne(notebook)

q1_tr = am.Question('1. Train', 4)
q1_tr.worth(None, 4)
q1_tr.add(None, am.PrintGreaterThan(r'Percentage correct = (?P<num>\d+(\.\d*)?)%', 87, 50), am.CodeMatch(['token_pos()']))
q1_tr(notebook)



# Q2 - Sentence POS...
q2_adj = am.Question('2. Adjacency', 2)
q2_adj.worth(None, 2)
q2_adj.add(None, am.MrBean())
q2_adj(notebook)

q2_hmm1 = am.Question('2. HMM code', 3)
q2_hmm1.worth(None, 3)
q2_hmm1.add(None, am.MrBean())
q2_hmm1(notebook)

q2_hmm2 = am.Question('2. HMM test', 3)
q2_hmm2.worth(None, 3)
q2_hmm2.add(None, am.PrintGreaterThan(r'Percentage correct = (?P<num>\d+(\.\d*)?)%', 89, 50), am.CodeMatch(['sentence_pos()']))
q2_hmm2(notebook)



# Q3 - Named entity recognition...
q3_ner = am.Question('3. NER', 2)
q3_ner.worth(None, 2)
q3_ner.mode(None, 'all')

q3_ner.add(None, am.FuncClose([False, False, True], 'sentence_ner', ['Malkovich'] * 3, ['C', 'I', 'N']), 'end')
q3_ner.add(None, am.FuncClose([False, True, True, False], 'sentence_ner', ['Malkovich'] * 4, ['D', 'D', 'N', 'Z']), 'end')
q3_ner.add(None, am.FuncClose([False, True, True, True, False, False, False], 'sentence_ner', ['Malkovich'] * 7, ['J', 'D', 'J', 'N', 'V', 'D' 'E']), 'end')
q3_ner.add(None, am.FuncClose([False, True, False, True, False, True, True, False], 'sentence_ner', ['Malkovich'] * 8, ['V', 'N', 'V', 'N', 'V', 'J' 'N', '.']), 'end')

q3_ner(notebook)



# Q4 - Relation extraction...
sentences = {}
parts = {}
names = {}
rels = {}

sentences['a'] = ['London', 'is', 'full', 'of', 'pigeons', '.']
parts['a']     = ['N', 'V', 'N', 'I', 'N', '.']
names['a']     = [True, False, True, False, True, False]
rels['a']      = [('London', 'is full of', 'pigeons')]

sentences['b'] = ['In', '1781', 'William', 'Herschel', 'discovered', 'Uranus']
parts['b']     = ['I', 'N', 'N', 'N', 'V', 'N']
names['b']     = [False, True, True, True, False, True]
rels['b']      = [('1781 William Herschel', 'discovered', 'Uranus')]

sentences['c'] = ['Trolls', 'really', 'do', 'not', 'like', 'the', 'sun', '.']
parts['c']     = ['N', 'R', 'V', 'R', 'I', 'D', 'N', '.']
names['c']     = [True, False, False, False, False, True, True, False]
rels['c']      = [('Trolls', 'do not like', 'the sun')]

sentences['d'] = ['Giant', 'owls', 'would', 'enjoy', 'eatting', 'people', '.']
parts['d']     = ['J', 'N', 'A', 'V', 'N', 'N', '.']
names['d']     = [True, True, False, False, True, True, False]
rels['d']      = [('Giant owls', 'enjoy', 'eatting people')]

sentences['e'] = ['Dragons', 'collect', 'gold', ',', 'but', 'they', 'do', 'not', 'make', 'microprocessors', '.']
parts['e']     = ['N', 'V', 'N', '.', 'C', 'M', 'V', 'R', 'V', 'N', '.']
names['e']     = [True, False, True, False, False, False, False, False, False, True, False]
rels['e']      = [('Dragons', 'collect', 'gold'), ('gold', 'do not make', 'microprocessors')]


def replacement_pos(sentence):
  for key in sentences.keys():
    if sentences[key]==sentence:
      return parts[key]
  
  print('ERROR: Sentence changed (pos)')
  raise KeyError


def replacement_ner(sentence, pos):
  for key in sentences.keys():
    if sentences[key]==sentence:
      return names[key]
  
  print('ERROR: Sentence changed (ner)')
  raise KeyError


def rel_equal(lhs, rhs):
  lhs = sorted(lhs)
  rhs = sorted(rhs)
  
  print('LHS:', lhs)
  print('RHS:', rhs)
  
  return lhs == rhs


patch = {'sentence_pos' : replacement_pos, 'sentence_ner' : replacement_ner}

q4_rel = am.Question('4. Relations', 4)
q4_rel.worth(None, 4)
q4_rel.mode(None, 'all')

for key in sentences.keys():
  q4_rel.add(None, am.WrapPatch(patch, am.FuncEqual(rel_equal, rels[key], 'extract', sentences[key])), 'end')

q4_rel(notebook)
