#include <stdio.h>
#include <stdlib.h>
//#include <GL/glew.h>
#ifdef _WIN32
#include <GL/wglew.h>
#endif 
#if defined(__APPLE__) || defined(__MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/freeglut.h>
#endif


#include <cuda.h> 
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

// CUDA runtime
// CUDA utilities and system includes
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>

//#include <helper_functions.h>
#include <helper_cuda.h>
#include <helper_gl.h>
//#include <rendercheck_gl.h>


#include <math.h>
#include <string.h>
#include <png.h> 
//#include <cstdio>

#define MAX_EPSILON 50
#define REFRESH_DELAY	  5 //ms


//#define MAX(a,b) ((a > b) ? a : b)
#define BUFFER_DATA(i) ((char *)0 + i)

static const char *shader_code = 
"!!ARBfp1.0\n"
"TEX result.color, fragment.texcoord, texture[0], 2D; \n"
"END";

GLuint gl_PBO, gl_Tex, gl_Shader;
struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange

//Source image on the host side
uchar4 *h_Src = 0;

// Destination image on the GPU side
uchar4 *d_dst = NULL;

unsigned int h_ruletable;

extern char p_board_width;
extern char p_board_height;
extern unsigned int pal_blue;
extern unsigned int pal_green;
extern unsigned int pal_red;

int button_down = 0;
int version = 0;
int numSMs = 0;
int imageW = 1024, imageH = 1024;
unsigned int hTimer;
int h_offsetx = 0;
int h_offsety = 0;
int zoom = 0;
//int is_fav[1 << 11];
bool ruletable_visible = false; 
bool run_cell_auto = true;
//int run_cell_auto_steps = 1;
int cell_auto_speed = 0;
unsigned long long board64 = 0x24375323637A4342l;
int board64_width = 64;
void initOpenGLBuffers(int w, int h);
void renderImage(bool bUseOpenGL);
void display_hex_code();

extern int selch;

//int color_mask = 1;
extern "C" void random_colors();
extern "C" void init_cellauto(int seed);
extern "C" void run_cellauto(int numSMs);
extern "C" void display_cellauto(uchar4 *dst,int imageW, int imageH,int numSMs);
//extern "C" void paint_cell(int x,int y,int c);
//extern "C" void saveF(char *filename);
//extern "C" void saveImg(char *filename);
//extern "C" bool openF(char *filename);
extern "C" bool create_board23();
extern "C" int get_red(int i);
extern "C" int get_green(int i);
extern "C" int get_blue(int i);
extern "C" void set_color(int i,int red,int green,int blue);
extern "C" void init_color_palette();

GLuint compileASMShader(GLenum program_type, const char *code) {
    GLuint program_id;
    glGenProgramsARB(1, &program_id);
    glBindProgramARB(program_type, program_id);
    glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei) strlen(code), (GLubyte *) code);

    GLint error_pos;
    glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);
    if (error_pos != -1) {
        const GLubyte *error_string;
        error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
        fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos, error_string);
        return 0;
    }
    return program_id;
}
void reshapeFunc(int w, int h) {
    glViewport(0, 0, w, h);

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);

    initOpenGLBuffers(w, h);
    imageW = w;
    imageH = h;
}
void renderImage(bool bUseOpenGL) {
  //printf("renderImage\n");
  if (bUseOpenGL) {
    checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
    size_t num_bytes;
    checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&d_dst, &num_bytes, cuda_pbo_resource));
  }
  //printf("map\n");
  display_cellauto(d_dst,imageW,imageH,numSMs);
  //cutilDeviceSynchronize();
  //printf("display\n");
  if (bUseOpenGL) {
    checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
  }  
  //printf("unmap\n");
}
int get_neighborhood_index(unsigned long long b,int x) {
  int bw = board64_width;
    int i = 0;
    for (int y = 2;y >= -2;y--) {
      i = i + i + ((b >> ((x+y+bw)%bw)) & 1);
    }
    return i;
}
void mousedrawFunc(int button, int state,int x,int y) {
  float ar = imageH;
  ar = ar / imageW;
  int w = board64_width;
  int h = (int) (w*ar);
  double fx = x;
  double fy = y;
  fx = fx / ((double) imageW);
  fy = fy / ((double) imageH);
  //fy = 1.0-fy;
  fx = fx * w;fy = fy * h;
  int x2 = (int)fx;int y2 = (int)fy;
  unsigned long long board65 = board64;

  if (state == GLUT_DOWN) {
    if (y2 == 0) {
      board64 = board64 ^ (1l << x2);
    } else {
      int i = get_neighborhood_index(board65,x2);
      h_ruletable = h_ruletable ^ (1 << i);
      display_hex_code();
    }
    glutPostRedisplay();
  }
}
unsigned long long calc_next_gen(unsigned long long b) {
  unsigned long long b1 = 0;
  unsigned long long b2 = 0;
  int bw = board64_width;
  for (int x = 0;x < bw;x++) {
    int i = get_neighborhood_index(b,x);
    b1 = ((h_ruletable >> i) & 1);
    b2 = b2 | (b1 << x);
  }
  return b2;
}
void use_pal_color(int i) {
  //float r = get_red(i) / 255.0;
  //float g = get_green(i) / 255.0;
  //float b = get_blue(i) / 255.0;
  //float r = (pal_red >> i) & 1;
  //float g = (pal_green >> i) & 1;
  //float b = (pal_blue >> i) & 1;
  //glColor3f(r,g,b);
  int c = selch;
  if (i == 0) {
    //if (selch == 0) {glColor3f(0.0,1.0,1.0);}
    //if (selch == 1) {glColor3f(1.0,0.0,1.0);}
    //if (selch == 2) {glColor3f(1.0,1.0,0.0);}
    glColor3f((c>>1)&1,(c>>3)&1,(c>>5)&1);
  }
  if (i == 1) {
    glColor3f((~c>>1)&1,(~c>>3)&1,(~c>>5)&1);
  //  if (selch == 0) {glColor3f(1.0,0.0,0.0);}
  //  if (selch == 1) {glColor3f(0.0,1.0,0.0);}
  //  if (selch == 2) {glColor3f(0.0,0.0,1.0);}
  }
}
void displayRT(int w,int h) {
  unsigned long long board65 = board64;
  glBegin(GL_QUADS);
  for (int y = (h-1);y >= (h-2);y--) {
    for (int x = 0;x < w;x++) {
      use_pal_color((board65 >> x) & 1);
      //if (((x+y) & 1) == 0) {
      glVertex2f((x+0.0)/w,(y+0.0)/h);
      glVertex2f((x+0.0)/w,(y+1.0)/h);
      glVertex2f((x+1.0)/w,(y+1.0)/h);
      glVertex2f((x+1.0)/w,(y+0.0)/h);
    }
    board65 = calc_next_gen(board65);
  }  
  glEnd();
}
void displayCA() {
    float ar = imageH;
    ar = ar / imageW;
    int w = board64_width;
    int h = (int) (w*ar);
    float tr = 0.0f;
    //cutilCheckError(cutResetTimer(hTimer));  
    glEnable(GL_TEXTURE_2D);
    if (ruletable_visible == true) {
      displayRT(w,h);
      tr = 2.0f / h;
    }
    renderImage(true);
    glBindTexture(GL_TEXTURE_2D, gl_Tex);
    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA, GL_UNSIGNED_BYTE, BUFFER_DATA(0));
    glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, gl_Shader);
    glEnable(GL_FRAGMENT_PROGRAM_ARB);
    glDisable(GL_DEPTH_TEST);
    glBegin(GL_QUADS);
    glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f, 0.0f);
    glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f, 0.0f);
    glTexCoord2f(1.0f, 1.0f - tr); glVertex2f(1.0f, 1.0f - tr);
    glTexCoord2f(0.0f, 1.0f - tr); glVertex2f(0.0f, 1.0f - tr);
    glEnd();

    glBindTexture(GL_TEXTURE_2D, 0);
    glDisable(GL_FRAGMENT_PROGRAM_ARB);
    glutSwapBuffers();
}
void displayFunc(void) {
  //  displayRT();
  //} else {
    displayCA();
  //}
}
void initOpenGLBuffers(int w, int h)
{
    // delete old buffers
    if (h_Src) {
        free(h_Src);
        h_Src = 0;
    }

    if (gl_Tex) {
        glDeleteTextures(1, &gl_Tex);
        gl_Tex = 0;
    }
    if (gl_PBO) {
		//DEPRECATED: cutilSafeCall(cudaGLUnregisterBufferObject(gl_PBO));    
		cudaGraphicsUnregisterResource(cuda_pbo_resource);
        glDeleteBuffers(1, &gl_PBO);
        gl_PBO = 0;
    }

    // check for minimized window
    if ((w==0) && (h==0)) {
        return;
    }

    // allocate new buffers
	h_Src = (uchar4*)malloc(w * h * 4);

    //printf("Creating GL texture...\n");
        glEnable(GL_TEXTURE_2D);
        glGenTextures(1, &gl_Tex);
        glBindTexture(GL_TEXTURE_2D, gl_Tex);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, h_Src);
    //printf("Texture created.\n");

    //printf("Creating PBO...\n");
        glGenBuffers(1, &gl_PBO);
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);
        glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, w * h * 4, h_Src, GL_STREAM_COPY);
        //While a PBO is registered to CUDA, it can't be used 
        //as the destination for OpenGL drawing calls.
        //But in our particular case OpenGL is only used 
        //to display the content of the PBO, specified by CUDA kernels,
        //so we need to register/unregister it only once.
        
	// DEPRECATED: cutilSafeCall( cudaGLRegisterBufferObject(gl_PBO) );
    checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO, cudaGraphicsMapFlagsWriteDiscard));
    //printf("PBO created.\n");

    // load shader program
    gl_Shader = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shader_code);
}
void cleanup() {
    if (h_Src) {
        free(h_Src);
        h_Src = 0;
    }
	cudaGraphicsUnregisterResource(cuda_pbo_resource);
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);

	    glDeleteBuffers(1, &gl_PBO);
        glDeleteTextures(1, &gl_Tex);
        glDeleteProgramsARB(1, &gl_Shader);
}

void initGL(int *argc, char **argv)
{
    printf("Initializing GLUT...\n");
        glutInit(argc, argv);
        glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
        glutInitWindowSize(imageW, imageH);
        glutInitWindowPosition(0, 0);
        glutCreateWindow(argv[0]);

        //printf("Loading extensions: %s\n", glewGetErrorString(glewInit()));
	//    if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) {
		//    fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
		//    fprintf(stderr, "This sample requires:\n");
		//    fprintf(stderr, "  OpenGL version 1.5\n");
		//    fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");
		//    fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");
		    //cutilExit(*argc, argv);
	 //   }
	printf("OpenGL window created.\n");
}
void initData() {
    int dev = 0;
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, dev);
    version = deviceProp.major*10 + deviceProp.minor;
    printf("version: %d \n", version);
    numSMs = deviceProp.multiProcessorCount;
}
int generations_per_second;
//int time1;
int time23;
int generation_frames_remaining = 1;
void timerEvent(int value) {
  //if ((run_cell_auto == true) & (ruletable_visible == false)) {
    if (cell_auto_speed >= 0) {
      int run_cell_auto_steps = 1 << cell_auto_speed;
      for (int i = 0;i < run_cell_auto_steps;i++) {
        run_cellauto(numSMs);
        //num_generations = num_generations + 1;
        generations_per_second++;
        if (time(NULL) != time23) {
          //printf("gps %d \n",generations_per_second);
          generations_per_second = 0;
          time23 = time(NULL);
        }
      }
    } else {
      generation_frames_remaining = generation_frames_remaining - 1;
      if (generation_frames_remaining == 0) {
        generation_frames_remaining = 1 << -cell_auto_speed;
        run_cellauto(numSMs);
        //num_generations = num_generations + 1;
      }
    }
    glutPostRedisplay();
  //}
  glutTimerFunc(REFRESH_DELAY, timerEvent, 1);

}
const char *hex_char = "0123456789ABCDEF";
void get_log_filename34(char *buffer) {
  time_t rawtime;
  struct tm * timeinfo;
  time (&rawtime);
  timeinfo = localtime (&rawtime);

  strftime (buffer,80,"%F.txt",timeinfo);
}
void display_hex_code() {
  char hex_code[32];
  int r = h_ruletable;
  hex_code[8] = 0;
  for (int i = 7;i >= 0;i--) {
    hex_code[i] = hex_char[r & 15]; 
    r = r >> 4;
  }
  r = h_ruletable;
  char log_filename34[80];
  FILE *f;
  get_log_filename34(log_filename34);
  f = fopen(log_filename34,"a");
  fprintf(f,"%s\n",hex_code);
  fclose(f);
  glutSetWindowTitle(hex_code);
}
//const char *color_keyboard = "asdzxc";
//const char *rt_keyboard = "1qaz2wsx3edc4rfv";  
//const char *rt_keyboard = ",ki8mju7nhy6bgt5vfr4cde3xsw2zaq1";
//int sel_color = 1;
const char *color_keyboard = "qawsed";
void keyboardFunc(unsigned char key, int x, int y) {
  //int color = 0;
  //color = (color << 2) | get_blue(sel_color) >> 6;
  //color = (color << 2) | get_green(sel_color) >> 6;
  //color = (color << 2) | get_red(sel_color) >> 6;
  
  for (int i = 0;i < 6;i++) {
    if (key == color_keyboard[i]) {
      selch = selch ^ (1 << i);
    }
    if ((key ^ 32) == color_keyboard[i]) {
      selch = selch ^ (1 << i);
    }
  }
  //  set_color(sel_color,(color & 3)*85,((color >> 2) & 3)*85,((color >> 4) & 3)*85);
  //if (key == 'z') {sel_color = 0;}
  //if (key == 'x') {sel_color = 1;}
  if (key == 13) {
    ruletable_visible = !ruletable_visible;
  } 
    //if (key == 'z') {
    //  if (color_mask > 1) {color_mask = color_mask >> 1;}
    //}
    //if (key == 'x') {
    //  if (color_mask < 63) {color_mask = (color_mask << 1) + 1;}
    //}
    //if (key == 'a') {selch = selch ^ 1;}
    //if (key == 's') {selch = selch ^ 2;}
    //if (key == 'd') {selch = selch ^ 4;}
    //if (key == 'x') {selch = (selch+1)%3;}
    if (key == 'c') {random_colors();}
    if (key == 'z') {board64 = calc_next_gen(board64);}
    if (key == ']') {
      if (cell_auto_speed < 30) {
        cell_auto_speed = cell_auto_speed + 1;
      }
    } 
    if (key == '[') {
      if (cell_auto_speed > -30) {
        cell_auto_speed = cell_auto_speed - 1;
      }
    } 

    if (key == '`') {
        h_ruletable = h_ruletable ^ rand();
        display_hex_code();
      //run_cellauto(numSMs);
    }
    if (key == ' ') {
      //run_cell_auto = !run_cell_auto;
      init_cellauto(time(0));
    }
  glutPostRedisplay();

}
unsigned int hexadecimalToDecimal(char hexVal[]) 
{    
  int len = strlen(hexVal); 
  unsigned int base = 1; 
  unsigned int dec_val = 0; 
      
  for (int i=len-1; i>=0; i--) {    
    if (hexVal[i]>='0' && hexVal[i]<='9') { 
      dec_val += (hexVal[i] - 48)*base; 
      base = base * 16; 
    } else if (hexVal[i]>='A' && hexVal[i]<='F') { 
      dec_val += (hexVal[i] - 55)*base; 
      base = base * 16; 
    } else if (hexVal[i]>='a' && hexVal[i]<='f') {
      dec_val += (hexVal[i] - 87)*base; 
      base = base * 16; 
    }
       
  } 
      
  return dec_val; 
} 
void keyboardFunc2(int key, int x, int y) {
  int a = 6;
  if (key == 100) { //left
    h_offsetx = h_offsetx - (1 << a);
  }
  if (key == 102) { //right
    h_offsetx = h_offsetx + (1 << a);
  }
    glutPostRedisplay();  
}
int main(int argc, char **argv)
{
  initGL(&argc, argv);
  initData();
  //printf("%d \n",argc);
    if (create_board23() == false) {
      printf("error: out of memory\n");
      return -1;
    }
    //init_h_ruletable();
  random_colors();
  init_cellauto(time(0));
  //h_ruletable = 0x6E6E6E6E;0 3 C F
  h_ruletable = 0x3CFC3CFC;
  if (argc > 1) {
    h_ruletable = hexadecimalToDecimal(argv[1]);
  }
  //h_ruletable = (short) rand();

  glutDisplayFunc(displayFunc);
  glutReshapeFunc(reshapeFunc);
  glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
  glutKeyboardFunc(keyboardFunc);
  glutSpecialFunc(keyboardFunc2);
  glutMouseFunc(mousedrawFunc);
  //glutMotionFunc(mouseMotionFunc);
  //glutSetWindowTitle("CA1D");
  //read_favorites_file();
  display_hex_code();
  atexit(cleanup);
  glutMainLoop();
  exit(EXIT_SUCCESS);

}

