#include <stdio.h>
#include <stdlib.h>
//#include <GL/glew.h>
#ifdef _WIN32
#include <GL/wglew.h>
#endif 
#if defined(__APPLE__) || defined(__MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/freeglut.h>
#endif


#include <cuda.h> 
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

// CUDA runtime
// CUDA utilities and system includes
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>

//#include <helper_functions.h>
#include <helper_cuda.h>
#include <helper_gl.h>
//#include <rendercheck_gl.h>


#include <math.h>
#include <string.h>
#include <png.h> 
//#include <cstdio>

#define MAX_EPSILON 50
#define REFRESH_DELAY	  5 //ms


//#define MAX(a,b) ((a > b) ? a : b)
#define BUFFER_DATA(i) ((char *)0 + i)

static const char *shader_code = 
"!!ARBfp1.0\n"
"TEX result.color, fragment.texcoord, texture[0], 2D; \n"
"END";

GLuint gl_PBO, gl_Tex, gl_Shader;
struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange

//Source image on the host side
uchar4 *h_Src = 0;

// Destination image on the GPU side
uchar4 *d_dst = NULL;

unsigned char h_ruletable[8];
extern char p_board_width;
extern char p_board_height;

bool ruletable_visible = true;
int version = 0;
int numSMs = 0;
int imageW = 1024, imageH = 1024;
unsigned int hTimer;
//unsigned long long num_generations = 0;
int h_offsetx = 0;
int h_offsety = 0;
int zoom = 0;
int cell_auto_speed = 0;
bool run_cell_auto = false;

void initOpenGLBuffers(int w, int h);


extern int color_phase;
extern int h_numStateBits;
int button_down = 0;
void renderImage(bool bUseOpenGL);

int sel_state = 0;
unsigned char button_color[4] = {0,1,0,1};
extern "C" void init_cellauto(int seed);
extern "C" void run_cellauto(int numSMs);
extern "C" void display_cellauto(uchar4 *dst,int imageW, int imageH,int numSMs);
extern "C" void paint_cell(int x,int y,int c);
extern "C" void saveF(char *filename);
extern "C" void saveImg(char *filename);
extern "C" bool openF(char *filename);
extern "C" bool create_board23();
extern "C" int get_red(int i);
extern "C" int get_green(int i);
extern "C" int get_blue(int i);
extern "C" void init_color_palette();

void flip_rule_table_entry(int x,int y) {
  h_ruletable[x] = (unsigned char) h_ruletable[x] ^ (1 << y);
}
void set_rule_table_entry(int x,int y,int b) {
  h_ruletable[x] = (unsigned char) h_ruletable[x] & ~(1 << y);
  h_ruletable[x] = (unsigned char) h_ruletable[x] |  (b << y);
}
int get_rule_table_entry(int x,int y) {
  return (h_ruletable[x] >> y) & 1;
}
void init_button_color() {
}
void init_h_ruletable() {
  for (int i = 0;i < 8;i++) {
    h_ruletable[i] = 0;
  }
}
GLuint compileASMShader(GLenum program_type, const char *code) {
    GLuint program_id;
    glGenProgramsARB(1, &program_id);
    glBindProgramARB(program_type, program_id);
    glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei) strlen(code), (GLubyte *) code);

    GLint error_pos;
    glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);
    if (error_pos != -1) {
        const GLubyte *error_string;
        error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
        fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos, error_string);
        return 0;
    }
    return program_id;
}

void reshapeFunc(int w, int h) {
    glViewport(0, 0, w, h);

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);

    initOpenGLBuffers(w, h);
    imageW = w;
    imageH = h;
}
void renderImage(bool bUseOpenGL) {
  //printf("renderImage\n");
  if (bUseOpenGL) {
    checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
    size_t num_bytes;
    checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&d_dst, &num_bytes, cuda_pbo_resource));
  }
  //printf("map\n");
  display_cellauto(d_dst,imageW,imageH,numSMs);
  //cutilDeviceSynchronize();
  //printf("display\n");
  if (bUseOpenGL) {
    checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
  }  
  //printf("unmap\n");
}
int generations_per_second;
//int time1;
int time23;
int generation_frames_remaining = 1;
void timerEvent(int value) {
  //printf("timer\n");
  if ((run_cell_auto == true) & (ruletable_visible == 0)) {
    if (cell_auto_speed >= 0) {
      int run_cell_auto_steps = 1 << cell_auto_speed;
      for (int i = 0;i < run_cell_auto_steps;i++) {
        run_cellauto(numSMs);
        //num_generations = num_generations + 1;
        generations_per_second++;
        if (time(NULL) != time23) {
          printf("gps %d \n",generations_per_second);
          generations_per_second = 0;
          time23 = time(NULL);
        }
      }
    } else {
      generation_frames_remaining = generation_frames_remaining - 1;
      if (generation_frames_remaining == 0) {
        generation_frames_remaining = 1 << -cell_auto_speed;
        run_cellauto(numSMs);
        //num_generations = num_generations + 1;
      }
    }
    glutPostRedisplay();
  }
  glutTimerFunc(REFRESH_DELAY, timerEvent, 1);

}
void use_color_from_palette(int i) {
  i = i & 3;
  double r = get_red(i) / 255.0;
  double g = get_green(i) / 255.0;
  double b = get_blue(i) / 255.0;
  glColor3f(r,g,b);
}
void displayRT() {
  int w = 7;
  int h = 4;
  glDisable(GL_DEPTH_TEST);
  glDisable(GL_TEXTURE_2D);
  glBegin(GL_QUADS);
  for (int y = 0;y < h;y++) {
    use_color_from_palette(y);
    glVertex2f(0.0,(y+0.0)/h);
    glVertex2f(0.0,(y+1.0)/h);
    glVertex2f(1.0,(y+1.0)/h);
    glVertex2f(1.0,(y+0.0)/h);
  }
  for (int y = 0;y < h;y++) {
    for (int x = 0;x < w;x++) {
      int a = get_rule_table_entry(x,y) | ((y & 1) << 1);
      use_color_from_palette(a);
      glVertex2f((x+0.1)/w,(y+0.1)/h);
      glVertex2f((x+0.1)/w,(y+0.9)/h);
      glVertex2f((x+0.9)/w,(y+0.9)/h);
      glVertex2f((x+0.9)/w,(y+0.1)/h);
    }
  }
  glEnd();
  glutSwapBuffers();
}
void displayCA() {
    //cutilCheckError(cutResetTimer(hTimer));  
    glEnable(GL_TEXTURE_2D);
    renderImage(true);
    glBindTexture(GL_TEXTURE_2D, gl_Tex);
    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA, GL_UNSIGNED_BYTE, BUFFER_DATA(0));
    glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, gl_Shader);
    glEnable(GL_FRAGMENT_PROGRAM_ARB);
    glDisable(GL_DEPTH_TEST);

    glBegin(GL_QUADS);
    glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f, 0.0f);
    glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f, 0.0f);
    glTexCoord2f(1.0f, 1.0f); glVertex2f(1.0f, 1.0f);
    glTexCoord2f(0.0f, 1.0f); glVertex2f(0.0f, 1.0f);
    glEnd();

    glBindTexture(GL_TEXTURE_2D, 0);
    glDisable(GL_FRAGMENT_PROGRAM_ARB);
    glutSwapBuffers();
}

void displayFunc(void) {
  if (ruletable_visible) {
    displayRT();
  } else {
    displayCA();
  }
}
void initOpenGLBuffers(int w, int h)
{
    // delete old buffers
    if (h_Src) {
        free(h_Src);
        h_Src = 0;
    }

    if (gl_Tex) {
        glDeleteTextures(1, &gl_Tex);
        gl_Tex = 0;
    }
    if (gl_PBO) {
		//DEPRECATED: cutilSafeCall(cudaGLUnregisterBufferObject(gl_PBO));    
		cudaGraphicsUnregisterResource(cuda_pbo_resource);
        glDeleteBuffers(1, &gl_PBO);
        gl_PBO = 0;
    }

    // check for minimized window
    if ((w==0) && (h==0)) {
        return;
    }

    // allocate new buffers
	h_Src = (uchar4*)malloc(w * h * 4);

    printf("Creating GL texture...\n");
        glEnable(GL_TEXTURE_2D);
        glGenTextures(1, &gl_Tex);
        glBindTexture(GL_TEXTURE_2D, gl_Tex);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, h_Src);
    printf("Texture created.\n");

    printf("Creating PBO...\n");
        glGenBuffers(1, &gl_PBO);
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);
        glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, w * h * 4, h_Src, GL_STREAM_COPY);
        //While a PBO is registered to CUDA, it can't be used 
        //as the destination for OpenGL drawing calls.
        //But in our particular case OpenGL is only used 
        //to display the content of the PBO, specified by CUDA kernels,
        //so we need to register/unregister it only once.
        
	// DEPRECATED: cutilSafeCall( cudaGLRegisterBufferObject(gl_PBO) );
    checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO, cudaGraphicsMapFlagsWriteDiscard));
    printf("PBO created.\n");

    // load shader program
    gl_Shader = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shader_code);
}
void cleanup() {
    if (h_Src) {
        free(h_Src);
        h_Src = 0;
    }
	cudaGraphicsUnregisterResource(cuda_pbo_resource);
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);

	    glDeleteBuffers(1, &gl_PBO);
        glDeleteTextures(1, &gl_Tex);
        glDeleteProgramsARB(1, &gl_Shader);
}

void initGL(int *argc, char **argv)
{
    printf("Initializing GLUT...\n");
        glutInit(argc, argv);
        glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
        glutInitWindowSize(imageW, imageH);
        glutInitWindowPosition(0, 0);
        glutCreateWindow(argv[0]);

       // printf("Loading extensions: %s\n", glewGetErrorString(glewInit()));
//	    if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) {
		//    fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
		//    fprintf(stderr, "This sample requires:\n");
		//    fprintf(stderr, "  OpenGL version 1.5\n");
		//    fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");
		//    fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");
		    //cutilExit(*argc, argv);
//	    }
	printf("OpenGL window created.\n");
}

void initData() {
    int dev = 0;
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, dev);
    version = deviceProp.major*10 + deviceProp.minor;
    printf("version: %d \n", version);
    numSMs = deviceProp.multiProcessorCount;
}
void mouseMotionFunc(int x,int y) {
  int b = button_down;
  //int c = button_color[b];//[(sel_state * 4) + b];
  int c = 1;//sel_state & 3;
  int mx = imageW>>1;
  int my = imageH>>1;
  if (zoom > 0) {
    mx = mx & (-1 << zoom);
    my = my & (-1 << zoom);
  }
  paint_cell(x-mx,(imageH-y)-my,c);
  glutPostRedisplay();
}
void button_click(int button, int x,int y) {
  int w = 7;
  int h = 4;
  double fx = x;
  double fy = y;
  fx = fx / ((double) imageW);
  fy = fy / ((double) imageH);
  fy = 1.0-fy;
  fx = fx * w;fy = fy * h;
  int x2 = (int)fx;int y2 = (int)fy;
  printf("x2 %d y2 %d \n",x2,y2);
  flip_rule_table_entry(x2,y2);
}
void mousedrawFunc(int button, int state,int x,int y) {
  if (state == GLUT_DOWN) {
    if (button <= 2) {
      button_down = button;
    }
  }
  if ((ruletable_visible == true) & (state == GLUT_DOWN)) {
    if (button <= 2) {
      button_click(button,x,y);
    }
  }
  glutPostRedisplay();
}
void keyboardFunc(unsigned char key, int x, int y) {
    if (key == ' ') {
      ruletable_visible = !ruletable_visible;
    }
    if (key == 13) {
      run_cell_auto = !run_cell_auto;
    }
    if (key == '-') {
      zoom = zoom - 1;
    }
    if (key == '=') {
      zoom = zoom + 1;
    }
    if (key == '>') {
      //if (run_cell_auto_steps > 1) {
      //  run_cell_auto_steps = run_cell_auto_steps >> 1;
      //}
      if (cell_auto_speed < 30) {
        cell_auto_speed = cell_auto_speed + 1;
      }
    } 
    if (key == '<') {
      //if (run_cell_auto_steps < 1000000000) {
      //  run_cell_auto_steps = run_cell_auto_steps << 1;
      //}
      if (cell_auto_speed > -30) {
        cell_auto_speed = cell_auto_speed - 1;
      }
    }
    if (key == '`') {
      run_cellauto(numSMs);
      //num_generations = num_generations + 1;
    }
    if (key == 's') {
      unsigned int t = time(NULL);
      char filename[100];
      sprintf(filename,"ca%d",t);
      saveF(filename);
      printf("filename: %s \n",filename);   
      strcat(filename,".png");
      saveImg(filename);
    }
    if (key == 'N') {
      init_cellauto(time(0));     
      //num_generations = 0;
    }
    glutPostRedisplay();
}
void keyboardFunc2(int key, int x, int y)
{
  int a = 6-zoom;
  if (a < 0) {a = 0;}
  if (key == 100) { //left
    h_offsetx = h_offsetx - (1 << a);
  }
  if (key == 101) { //up
    h_offsety = h_offsety + (1 << a);
  }
  if (key == 102) { //right
    h_offsetx = h_offsetx + (1 << a);
  }
  if (key == 103) { //down
    h_offsety = h_offsety - (1 << a);
  }
    glutPostRedisplay();  
}
int main(int argc, char **argv)
{
  initGL(&argc, argv);
  initData();
  int i = 0;
  char *filename = NULL;
  while (i < argc) {
    if (strcmp(argv[i],"-s") == 0) {
      i = i + 1;
      if (i >= argc) {break;} 
      int s = atoi(argv[i]); 
      p_board_width = (s+1) >> 1;
      p_board_height = s >> 1;
    }

    if (strcmp(argv[i],"-f") == 0) {
      i = i + 1;
      if (i >= argc) {break;} 
      filename = argv[i];
    }
    //argc = 1;
    i = i + 1;
  }
  if (filename != NULL) {
      if (openF(filename) == false) {
        printf("error: can not open file %s \n",filename);
        return 0;
      }
  } else {
    if (create_board23() == false) {
      printf("error: out of memory\n");
      return -1;
    }
    init_h_ruletable();
    init_cellauto(0); 
  }
  //init_button_color();
  //init_color_palette();

  glutDisplayFunc(displayFunc);
  glutReshapeFunc(reshapeFunc);
  glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
  glutKeyboardFunc(keyboardFunc);
  glutSpecialFunc(keyboardFunc2);
  glutMouseFunc(mousedrawFunc);
  glutMotionFunc(mouseMotionFunc);

  atexit(cleanup);
  glutMainLoop();
  exit(EXIT_SUCCESS);
}

