#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <limits>
#include "visualizer.h"
using namespace std;

/* Type: ImageData
 * ---------------------------------------------
 * A type representing image data tagged with a
 * label.
 */
struct ImageData {
  vector<double> image; // Raw picture data
  int label;            // Label from 0 - 9
};

/* Constant: kImageDimension
 * Value: The size of one side of an image.
 */
const size_t kImageDimension = 28;

/* Constant: kImageSize
 * Value: The number of pixels in an image.
 */
const size_t kImageSize = kImageDimension * kImageDimension;

/* Constant: kLearningRate
 * Value: The learning rate (alpha) for perceptron learning.
 */
const double kLearningRate = 1.0e-5;

/* Constant: kDesiredAccuracy
 * Value: What accuracy we want before considering a perceptron "trained."
 */
const double kDesiredAccuracy = 5.0e-3;

/* Loads a data set from the file specified in filename, then
 * returns it.
 */
vector<ImageData> LoadDataSet(string filename) {
  vector<ImageData> result;
  ifstream input(filename.c_str());

  /* Sit in a loop reading data. */
  while (true) {
    ImageData current;

    /* Read the image data. */
    for (size_t i = 0; i < kImageSize; ++i) {
      double currValue;
      input >> currValue;
      current.image.push_back(currValue);
    }

    /* Append +1.0 as required by the perceptron algorithm. */
    current.image.push_back(1.0);

    /* Read in the label. */
    input >> current.label;

    /* If we've run out of data, we're done. */
    if (input.fail())
      break;

    result.push_back(current);
  }

  return result;
}

/* Given two vectors of equal size, computes the dot product of the two
 * vectors.  This is found by summing the products of corresponding elements
 * in the vectors.
 */
double DotProduct(vector<double>& one, vector<double>& two) {
  double result = 0.0;

  for (size_t i = 0; i < one.size(); ++i)
    result += one[i] * two[i];

  return result;
}

/* The step function is 0 for nonpositive values and 1 otherwise.
 * We can compute this by just saying value > 0, since C++ interprets
 * this as 1 if the expression is true and 0 otherwise.
 */
int StepFunction(double value) {
  return value > 0.0;
}

/* Trains a classifier to recognize the specified digit using the indicated
 * data set, then returns it.
 */
vector<double> TrainClassifier(vector<ImageData>& trainingSet, int digit) {
  /* A vector of the weights.  We have one weight for each component of
   * the image, plus one extra for the constant term.
   */
  vector<double> weights(kImageSize + 1);

  /* Keep looping until we arrive at a good classifier. */
  while (true) {
    /* First, check how the classifier is doing by computing the errors
     * associated with each training example.
     */
    vector<int> errors(trainingSet.size());

    for (size_t i = 0; i < trainingSet.size(); ++i) {
      /* See what our perceptron says. */
      int ourGuess = StepFunction(DotProduct(weights, trainingSet[i].image));

      /* See what the real answer */
      int actual = (digit == trainingSet[i].label);

      /* Store difference: real - ourGuess */
      errors[i] = actual - ourGuess;
    }

    /* Count the number of errors, which are terms where the error is
     * not identically zero.
     */
    size_t numWrong = 0;
    for (size_t i = 0; i < errors.size(); ++i)
      numWrong += (errors[i] != 0);

    /* If the error rate is below a specified threshold, we're done. */
    if (numWrong < kDesiredAccuracy * trainingSet.size())
      return weights;

    /* Otherwise, update the weights according to the perceptron learning algorithm. */
    for (size_t i = 0; i < weights.size(); ++i)
      for (size_t example = 0; example < trainingSet.size(); ++example)
        weights[i] += kLearningRate * errors[example] * trainingSet[example].image[i];

  }
}

/* Runs the classifier array on a test set and reports the accuracy. */
void TestClassifiers(vector<ImageData>& testSet, vector< vector<double> >& classifiers) {
  /* Count how many times we were correct. */
  size_t numCorrect = 0;

  /* Run the classifiers on every test image. */
  for (size_t i = 0; i < testSet.size(); ++i) {
    /* Initially, our guess is zero with -infinity as our confidence.  To use
     * numeric_limits, you'll need to #include <limits>
     */
    size_t bestGuess = 0;
    double bestConfidence = -numeric_limits<double>::infinity();

    /* Run each classifier on the image, picking the one with the highest
     * confidence.
     */
    for (size_t classifier = 0; classifier < classifiers.size(); ++classifier) {
      double confidence = DotProduct(classifiers[classifier], testSet[i].image);

      if (confidence > bestConfidence) {
        bestConfidence = confidence;
        bestGuess = classifier;
      }
    }

    /* If we got this answer right, update the number of correct answers.  If
     * you're interested, a cool extension would be to track the "confusion 
     * matrix," a 10x10 grid saying for each actual answer what we thought
     * it was.  This lets us see, for example, if there are two digits that
     * are often confused with one another.
     */
    if (bestGuess == testSet[i].label)
      ++numCorrect;
  }

  /* Print out the accuracy. */
  cout << "Accuracy: " << ((100.0 * numCorrect) / testSet.size()) << "%" << endl;
}

int main() {
  /* Load training data. */
  vector<ImageData> trainingSet = LoadDataSet("data/training-data");

  /* Train a classifier for each digit. */
  vector< vector<double> > classifiers;
  for (int i = 0; i < 10; ++i) {
    cout << "Training a classifier to recognize: " << i << endl;
    classifiers.push_back(TrainClassifier(trainingSet, i));

    /* Save a visualization so that we can see what the computer thinks each
     * digit looks like.
     */
    SaveVisualization(classifiers.back(), i);
  }

  /* Test the classifiers on the testing data. */
  vector<ImageData> testingSet = LoadDataSet("data/testing-data");
  TestClassifiers(testingSet, classifiers);
}


