#include <iostream>
#include <sstream>

#define USE_AUDIO
#include "NtKinect.h"

#include "NtGoogleSpeech.h"

using namespace std;

#include <time.h>
string now() {
  char s[1024];
  time_t t = time(NULL);
  struct tm lnow;
  localtime_s(&lnow, &t);
  sprintf_s(s, "%04d-%02d-%02d_%02d-%02d-%02d", lnow.tm_year + 1900, lnow.tm_mon + 1, lnow.tm_mday,
	    lnow.tm_hour, lnow.tm_min, lnow.tm_sec);
  return string(s);
}

void doJob() {
  NtKinect kinect;
  bool flag = false;
  string filename = "";
  NtGoogleSpeech gs("C:\\Users\\nitta\\Documents\\GoogleSpeech\\token-file.txt");

  std::wcout.imbue(std::locale("")); // for wcout
  while (1) {
    kinect.setRGB();
    if (flag) kinect.setAudio();
    cv::putText(kinect.rgbImage, flag ? "Recording" : "Stopped", cv::Point(50, 50),
		cv::FONT_HERSHEY_SIMPLEX, 1.2, cv::Scalar(0, 0, 255), 1, CV_AA);
    cv::imshow("rgb", kinect.rgbImage);
    auto key = cv::waitKey(1);
    if (key == 'q') break;
    else if (key == 'r') flag = true;
    else if (key == 's') flag = false;
    else if (key == 'u' || key == 'j') {
      if (filename != "") {
	string res = gs.doSyncRequest(filename,(key == 'u')? "en-US" : "ja-JP");
	vector<wchar_t> u16;
	if (gs.utf8ToUtf16(&res[0],(int)res.length(),u16)) {
	  wstring w16(u16.begin(),u16.end());
	  std::wcout << w16 << endl;
	} else {
	  cout << res << endl;
	}
	
	string outname(filename.begin(), filename.end()-4);
	ofstream fout(outname+".txt");
	fout << res;
      }
    }

    if (flag && !kinect.isOpenedAudio()) {
      filename = now() + ".wav";
      kinect.openAudio(filename);
    } else if (!flag && kinect.isOpenedAudio()) kinect.closeAudio();
  }
  cv::destroyAllWindows();
}

int main(int argc, char** argv) {
  try {
    doJob();
  }
  catch (exception &ex) {
    cout << ex.what() << endl;
    string s;
    cin >> s;
  }
  return 0;
}
