# -*- coding: utf-8 -*-
"""
Created on Mon Feb  8 10:47:00 2016

@author: Remi Eyraud

Tested with Python 2.7 and Python 3.4
"""

# State the problem number
problem_number = '0'

# and the user id (given during registration)
user_id = ''

train_file = problem_number + ".spice.train"
prefix_file = problem_number + ".spice.public.test"

#set parameter values

#Estimated rank of the Hankel matrix
rank = 4

#Allow only some of the possible rows and columns of the matrix
partial = True

#Set max length of elements for rows and column
lrows = 3
lcolumns = 3

#Set which version of the matrix you want to work with
version = "factor"  # "classic" , "prefix", "suffix" , "factor"

#Set whether you want to use the sparse or the classic version of the matrix
sparse = True


# name of this submission (no space or special character)
name = "rank_" + str(rank) + "_sparse_" + version + "_lrows_lcolumns_" + str(lrows)


def learn(train_file, parameter):
    """ Learn a weighted automaton using spectral approach
        parameter is the rank
        """
    # Import the SPiCe spectral learning toolbox
    import sp2learn.learning as LC
    from sp2learn.sample import Sample

    # Get the learning sample in needed dictionnary format
    pT = Sample(adr=train_file, lrows=lrows, lcolumns=lcolumns,
                version=version, partial=partial)
        
    # Create a learning instance
    S_app = LC.Learning(sample_instance=pT)
                
    # Learn an automaton (see documentation for other possible parameters)
    A = S_app.LearnAutomaton(rank=parameter, lrows=lrows,
                             lcolumns=lcolumns, version=version,
                             partial=partial, sparse=sparse)

    # Transform the automaton in order to compute prefix weights instead of sequence weights
    Ap = A.transformation(source="classic", target="prefix")

    return Ap


def next_symbols_ranking(model, prefix, k=5):
    """ Give the sorted list of the k more frequent next symbols of the prefix in the automaton 
        The model needs to compute prefix weights (and not sequence weight)
        """
    # Word has to be a list of integer (and not a string)
    # First element is the length of the prefix and thus has to be erased
    word = prefix.split()
    word = [int(i) for i in word][1:]

    # Compute the weight of the prefix
    p_w = model.val(word)
    for i in range(model.nbL):
        p_w -= model.val(word+[i])

    # Symbol -1 correspond to end of sequence
    # If the weight is negative it does not carry any semantic
    l = [(-1, max(p_w, 0))]
    s = max(p_w, 0)

    # Compute the weight of the prefix concatenated to each possible symbol
    for i in range(model.nbL):
        l.append((i, max(model.val(word+[i]), 0)))
        s += max(model.val(word+[i]), 0)

    # Sort the symbol by decreasing weight
    l = sorted(l, key=lambda x: -x[1])

    if s != 0:
        # At least one symbol has a strictly positive weight
        # Return a string containing the sorted k most probable next symbols separted by spaces
        mot = trans_string([x[0] for x in l][0:k])
        return mot
    else:
        # All symbols have a non-positive weight in the model
        # Return the k first symbols...
        return trans_string([x for x in range(-1, k-1)])


def trans_string(list):
    """ Transform a list of interger into a string of elements separated by a space """
    mot = ""
    for w in list:
        mot +=  str(w) + ' '
    return mot


def get_first_prefix(test_file):
    """ get the only prefix in test_file """
    f = open(test_file)
    prefix = f.readline()
    f.close()
    return prefix


def formatString(string_in):
    """ Replace white spaces by %20 """
    return string_in.strip().replace(" ", "%20")


# learn the model
print ("Start Learning")
model = learn(train_file, rank)
print ("Learning Ended")

# get the test first prefix: the only element of the test set
first_prefix = get_first_prefix(prefix_file)

# get the next symbol ranking on the first prefix
ranking = next_symbols_ranking(model, first_prefix)

print ("Prefix number: 1 Ranking: " + ranking + " Prefix: " + first_prefix)

# transform ranking to follow submission format (with %20 between symbols)
ranking = formatString(ranking)

# transform the first prefix to follow submission format
first_prefix = formatString(first_prefix)

# create the url to submit the ranking
url_base = 'http://spice.lif.univ-mrs.fr/submit.php?user=' + user_id +\
           '&problem=' + problem_number + '&submission=' + name + '&'
url = url_base + 'prefix=' + first_prefix + '&prefix_number=1' + '&ranking=' +\
      ranking

# Get the website answer for the first prefix with this ranking using this
# submission name
try:
    # Python 2.7
    import urllib2 as ur
    orl2 = True
except:
    #Python 3.4
    import urllib.request as ur
    orl2 = False

response = ur.urlopen(url)
content = response.read()

if not orl2:
    # Needed for python 3.4...
    content= content.decode('utf-8')

list_element = content.split()
head = str(list_element[0])

prefix_number = 2

while(head != '[Error]' and head != '[Success]'):
    prefix = content[:-1]
    # Get the ranking
    ranking = next_symbols_ranking(model, prefix)
    
    if prefix_number % 200 == 0:
        print("Prefix number: " + str(prefix_number) + " Ranking: " + ranking + " Prefix: " + prefix)
    
    # Format the ranking
    ranking = formatString(ranking)

    # create prefix with submission needed format
    prefix=formatString(prefix)

    # Create the url with your ranking to get the next prefix
    url = url_base + 'prefix=' + prefix + '&prefix_number=' +\
        str(prefix_number) + '&ranking=' + ranking

    # Get the answer of the submission on current prefix
    response = ur.urlopen(url)
    content = response.read()
    if not orl2:
        # Needed for Python 3.4...
        content= content.decode('utf-8')

    list_element = content.split()
    # modify head in case it is finished or an erro occured
    head = str(list_element[0])
    # change prefix number
    prefix_number += 1

# Post-treatment
# The score is the last element of content (in case of a public test set)
print(content)

list_element = content.split()
score = (list_element[-1])
print(score)