/*--------------------------------------------------------------------
 * FILE:
 *     cascade.c
 *
 * NOTE:
 *     This file is composed of the functions to call with the source
 *     at pgreplicate for backup and cascade .
 *
 * Portions Copyright (c) 2003, Atsushi Mitani
 *--------------------------------------------------------------------
 */
#include "unistd.h"
#include "postgres.h"
#include "postgres_fe.h"

#include <stdio.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#include <ctype.h>
#include <time.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/sem.h>
#include <signal.h>
#include <sys/socket.h>
#include <unistd.h>
#include <netdb.h>
#ifdef HAVE_NETINET_TCP_H
#include <netinet/tcp.h>
#endif
#include <dirent.h>
#include <arpa/inet.h>
#ifdef HAVE_CRYPT_H
#include <crypt.h>
#endif

#ifdef MULTIBYTE
#include "mb/pg_wchar.h"
#endif

#include "libpq-fe.h"
#include "libpq-int.h"
#include "fe-auth.h"
#include "access/xact.h"
#include "replicate_com.h"
#include "pgreplicate.h"

int PGRstartup_cascade(void);
int PGRsend_lower_cascade(ReplicateHeader * header, char * query);
int PGRsend_upper_cascade(ReplicateHeader * header, char * query);
ReplicateServerInfo * PGRget_lower_cascade(void);
ReplicateServerInfo * PGRget_upper_cascade(void);
void PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status);
ReplicateServerInfo * PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header);
int PGRsend_cascade(ReplicateServerInfo * cascade, ReplicateHeader * header, char * query);
int PGRcascade_main(int sock, ReplicateHeader * header, char * query);

static int get_cascade_data(ReplicateServerInfo * buf, int flag);
static int add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data);
static int update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data);
static void write_cascade_status_file(ReplicateServerInfo * cascade);
static int notice_cascade_data(int sock);

/*--------------------------------------
 * PROTOTYPE DECLARATION
 *--------------------------------------
 */

#if 0
static int
count_cascade(int flag)
{
	int cnt = 0;
	int cascade_cnt = 0;
	ReplicateServerInfo * cascade = NULL;

	if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
	{
		return 0;
	}

	/* count cascadeing replication server */
	switch (flag)
	{
		case UPPER_CASCADE:
		case ALL_CASCADE:
			cascade = Cascade_Tbl;
			break;
		case LOWER_CASCADE:
			cascade = Cascade_Inf->myself;
			break;
	}

	if (cascade == NULL)
	{
		return 0;
	}
	while (cascade->useFlag != DB_TBL_END)
	{
		if ((cascade->useFlag == DB_TBL_USE) ||
			(cascade->useFlag == DB_TBL_TOP))
		{
			cascade_cnt ++;
		}
		if ((flag == UPPER_CASCADE) &&
			(cascade == Cascade_Inf->myself))
		{
			break;
		}
		cnt ++;
		if (cnt >= MAX_DB_SERVER -1 )
		{
			break;
		}
		cascade ++;
	}
	return cascade_cnt;
}
#endif

static int
get_cascade_data(ReplicateServerInfo * buf, int flag)
{
	char * func = "get_cascade_data()";
	int cnt = 0;
	int size = 0;
	int i = 0;
	ReplicateServerInfo * cascade = NULL;

	size = sizeof(ReplicateServerInfo) * MAX_DB_SERVER;
	buf = (ReplicateServerInfo *)malloc(size);
	if (buf == (ReplicateServerInfo *)NULL)
	{
		show_error("%s:malloc failed: (%s)",func,strerror(errno));
		return 0;
	}
	memset(buf,0,size);

	switch (flag)
	{
		case UPPER_CASCADE:
		case ALL_CASCADE:
			cascade = Cascade_Tbl;
			break;
		case LOWER_CASCADE:
			cascade = Cascade_Inf->myself;
			break;
	}

	if (cascade == NULL)
	{
		return 0;
	}
	i = 0;
	cnt = 0;
	while (cascade->useFlag != DB_TBL_END)
	{
		if ((cascade->useFlag == DB_TBL_USE) ||
			(cascade->useFlag == DB_TBL_TOP))
		{
			(buf + i)->useFlag = htonl(cascade->useFlag);
			memcpy((buf + i)->hostName,cascade->hostName,sizeof(cascade->hostName));
			(buf + i)->portNumber = htons(cascade->portNumber);
			(buf + i)->recoveryPortNumber = htons(cascade->recoveryPortNumber);
			i++;
		}
		if ((flag == UPPER_CASCADE) &&
			(cascade == Cascade_Inf->myself))
		{
			break;
		}
		cnt ++;
		if (cnt >= MAX_DB_SERVER -1 )
		{
			break;
		}
		cascade ++;
	}
	return i;
}

static int
update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data)
{
	char * func = "update_cascade_data()";
	int size = 0;
	int cnt = 0;
	ReplicateServerInfo * ptr = NULL;
	ReplicateServerInfo * cascade = NULL;
	char hostName[HOSTNAME_MAX_LENGTH];

	if ((header == NULL ) || ( update_data == NULL))
	{
		show_error("%s:receive data is wrong",func);
		return STATUS_ERROR;
	}
	if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
	{
		show_error("%s:config data read error",func);
		return STATUS_ERROR;
	}
	size = ntohl(header->query_size);
	cnt = size / sizeof(ReplicateServerInfo);
	if (cnt >= MAX_DB_SERVER)
	{
		show_error("%s:update cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
		return STATUS_ERROR;
	}

	Cascade_Inf->useFlag = DB_TBL_INIT;
	if ((Cascade_Inf->upper != NULL) && (Cascade_Inf->upper->sock > 0))
	{
		close(Cascade_Inf->upper->sock);
	}
	if ((Cascade_Inf->lower != NULL) && (Cascade_Inf->lower->sock > 0))
	{
		close(Cascade_Inf->lower->sock);
	}
	Cascade_Inf->myself = NULL;
	Cascade_Inf->upper = NULL;
	Cascade_Inf->lower = NULL;

	gethostname(hostName,sizeof(hostName));
	ptr = update_data;
	cascade = Cascade_Tbl;
	memset(cascade,0,(sizeof(ReplicateServerInfo)*MAX_DB_SERVER));
	while (cnt > 0)
	{
		cascade->useFlag = ntohl(ptr->useFlag);
		memcpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
		cascade->portNumber = ntohs(ptr->portNumber);
		cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);

		if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName)))  &&
			(cascade->portNumber == Port_Number) &&
			(cascade->recoveryPortNumber == Recovery_Port_Number))
		{
			Cascade_Inf->myself = cascade;
		}
		cascade ++;
		cascade->useFlag = DB_TBL_END;
		ptr ++;
		cnt --;
	}
	Cascade_Inf->useFlag = DB_TBL_USE;

	return STATUS_OK;
}

static int
add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data)
{
	char * func = "add_cascade_data()";
	int size = 0;
	int cnt = 0;
	ReplicateServerInfo * ptr = NULL;
	ReplicateServerInfo * cascade = NULL;
	char hostName[HOSTNAME_MAX_LENGTH];

	if ((header == NULL ) || ( add_data == NULL))
	{
		show_error("%s:receive data is wrong",func);
		return STATUS_ERROR;
	}
	if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
	{
		show_error("%s:config data read error",func);
		return STATUS_ERROR;
	}
	size = ntohl(header->query_size);
	cnt = size / sizeof(ReplicateServerInfo);
	if (cnt >= MAX_DB_SERVER)
	{
		show_error("%s:addtional cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
		return STATUS_ERROR;
	}

	Cascade_Inf->useFlag = DB_TBL_INIT;
	if ((Cascade_Inf->lower != NULL) && (Cascade_Inf->lower->sock > 0))
	{
		close(Cascade_Inf->lower->sock);
	}
	Cascade_Inf->lower = NULL;

	gethostname(hostName,sizeof(hostName));
	ptr = add_data;
	cascade = Cascade_Inf->myself;
	cascade ++;
	while (cnt > 0)
	{
		cascade->useFlag = ntohl(ptr->useFlag);
		memcpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
		cascade->portNumber = ntohs(ptr->portNumber);
		cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);

		if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName)))  &&
			(cascade->portNumber == Port_Number) &&
			(cascade->recoveryPortNumber == Recovery_Port_Number))
		{
			ptr ++;
			cnt --;
			continue;
		}
		cascade ++;
		cascade->useFlag = DB_TBL_END;
		ptr ++;
		cnt --;
	}
	Cascade_Inf->useFlag = DB_TBL_USE;

	return STATUS_OK;
}

int
PGRstartup_cascade(void)
{
	char * func = "PGRstartup_cascade()";
	int cnt = 0;
	int status = STATUS_OK;
	ReplicateHeader header;
	ReplicateServerInfo * cascade = NULL;
	ReplicateServerInfo * buf = NULL;

	if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
	{
		show_error("%s:config data read error",func);
		return STATUS_ERROR;
	}

	/* count lower server */
	cascade = Cascade_Inf->myself;
	if (cascade == NULL)
	{
		show_error("%s:cascade data initialize error",func);
		return STATUS_ERROR;
	}
	cnt = get_cascade_data(buf,LOWER_CASCADE);
	if (cnt <= 0)
	{
		show_error("%s:cascade data get error",func);
		return STATUS_ERROR;
	}
	
	memset(&header,0,sizeof(ReplicateHeader));
	header.cmdSys = CMD_SYS_CASCADE;
	header.cmdSts = CMD_STS_TO_UPPER;
	header.cmdType = CMD_TYPE_ADD;
	header.query_size = htonl(sizeof(ReplicateServerInfo) * cnt);

	status = PGRsend_upper_cascade(&header, (char *)buf);
	if (buf != NULL)
	{
		free(buf);
	}
	if (status == STATUS_OK)
	{
		memset(&header,0,sizeof(ReplicateHeader));
		buf = PGRrecv_cascade_answer( Cascade_Inf->upper, &header);
		if ((buf != NULL) &&
			(header.cmdSys == CMD_SYS_CASCADE) &&
			(header.cmdSts == CMD_STS_TO_LOWER) &&
			(header.cmdType == CMD_TYPE_UPDATE_ALL))
		{
			status = update_cascade_data(&header,buf);
			free(buf);
		}
	}
	
	return status;
}

int
PGRsend_lower_cascade(ReplicateHeader * header, char * query)
{
	int status = STATUS_OK;

	for(;;)
	{
		if (Cascade_Inf->lower->sock <= 0)
		{
			Cascade_Inf->lower = NULL;
		}
		while (Cascade_Inf->lower == NULL)
		{
			Cascade_Inf->lower = PGRget_lower_cascade();	
			if (Cascade_Inf->lower == NULL)
			{
				PGR_Cascade = false;
				return STATUS_ERROR;
			}
			status = PGR_Create_Socket_Connect(&(Cascade_Inf->lower->sock),Cascade_Inf->lower->hostName,Cascade_Inf->lower->portNumber);
	
			if (status != STATUS_OK)
			{
				PGRset_cascade_server_status(Cascade_Inf->lower,DB_TBL_ERROR);
				Cascade_Inf->lower = NULL;
			}
			else
			{
				break;
			}
		}
		while (PGRsend_cascade(Cascade_Inf->lower,header,query) != STATUS_OK)
		{
			if (Cascade_Inf->lower->sock > 0)
			{
				close(Cascade_Inf->lower->sock);
			}
			PGRset_cascade_server_status(Cascade_Inf->lower,DB_TBL_ERROR);
			Cascade_Inf->lower = NULL;
		}
		if (Cascade_Inf->lower != NULL)
		{
			break;
		}
	}
	return STATUS_OK;
}

int
PGRsend_upper_cascade(ReplicateHeader * header, char * query)
{
	int status = STATUS_OK;

	for(;;)
	{
		if (Cascade_Inf->upper->sock <= 0)
		{
			Cascade_Inf->upper = NULL;
		}
		while (Cascade_Inf->upper == NULL)
		{
			Cascade_Inf->upper = PGRget_upper_cascade();	
			if (Cascade_Inf->upper == NULL)
			{
				return STATUS_ERROR;
			}
			status = PGR_Create_Socket_Connect(&(Cascade_Inf->upper->sock),Cascade_Inf->upper->hostName,Cascade_Inf->upper->portNumber);
	
			if (status != STATUS_OK)
			{
				PGRset_cascade_server_status(Cascade_Inf->upper,DB_TBL_ERROR);
				Cascade_Inf->upper = NULL;
			}
			else
			{
				break;
			}
		}
		while (PGRsend_cascade(Cascade_Inf->upper,header,query) != STATUS_OK)
		{
			if (Cascade_Inf->upper->sock > 0)
			{
				close(Cascade_Inf->upper->sock);
			}
			PGRset_cascade_server_status(Cascade_Inf->upper,DB_TBL_ERROR);
			Cascade_Inf->upper = NULL;
		}
		if (Cascade_Inf->upper != NULL)
		{
			break;
		}
	}
	return STATUS_OK;
}

ReplicateServerInfo *
PGRget_lower_cascade(void)
{
	char * func = "PGRget_lower_cascade()";
	ReplicateServerInfo * cascade = NULL;

	if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
	{
		show_error("%s:config data read error",func);
		return NULL;
	}

	/* count lower server */
	cascade = Cascade_Inf->myself;
	if (cascade == NULL)
	{
		show_error("%s:cascade data initialize error",func);
		return NULL;
	}
	cascade ++;
	while (cascade->useFlag != DB_TBL_END)
	{
		if (cascade->useFlag == DB_TBL_USE)
		{
			return cascade;
		}
		cascade ++;
	}
	return NULL;
}

ReplicateServerInfo *
PGRget_upper_cascade(void)
{
	char * func = "PGRget_upper_cascade()";
	ReplicateServerInfo * cascade = NULL;

	if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
	{
		show_error("%s:config data read error",func);
		return NULL;
	}

	/* count lower server */
	cascade = Cascade_Inf->myself;
	if (cascade == NULL)
	{
		show_error("%s:cascade data initialize error",func);
		return NULL;
	}
	if (cascade->useFlag == DB_TBL_TOP)
	{
		return NULL;
	}
	cascade --;
	while (cascade->useFlag != DB_TBL_TOP)
	{
		if (cascade->useFlag == DB_TBL_USE)
		{
			return cascade;
		}
		cascade --;
	}
	return NULL;
}

static void
write_cascade_status_file(ReplicateServerInfo * cascade)
{
	switch( cascade->useFlag)
	{
		case DB_TBL_FREE:
			PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) free",
					cascade->hostName,
					cascade->portNumber);
			break;
		case DB_TBL_INIT:
			PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) initialize",
					cascade->hostName,
					cascade->portNumber);
			break;
		case DB_TBL_USE:
			PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) start use",
					cascade->hostName,
					cascade->portNumber);
			break;
		case DB_TBL_ERROR:
			PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) error",
					cascade->hostName,
					cascade->portNumber);
			break;
		case DB_TBL_TOP:
			PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) become top",
					cascade->hostName,
					cascade->portNumber);
			break;
	}
}

void
PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status)
{
	if (cascade == NULL)
	{
		return;
	}
	if (cascade->useFlag != status)
	{
		cascade->useFlag = status;
		write_cascade_status_file(cascade);
	}
}

ReplicateServerInfo *
PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header)
{
	ReplicateServerInfo * answer = NULL;

	if ((cascade == NULL) || (header == NULL))
	{
		return NULL;
	}
	answer = (ReplicateServerInfo*)PGRread_packet(cascade->sock,header);
	return answer;
}

int
PGRsend_cascade(ReplicateServerInfo * cascade, ReplicateHeader * header, char * query)
{
	int s;
	char * send_ptr;
	char * buf;
	int send_size = 0;
	int buf_size;
	int header_size;
	int rtn;
	fd_set      wmask;
	struct timeval timeout;
	int query_size = 0;

	/* check parameter */
	if ((cascade == NULL) || (header == NULL))
	{
		return STATUS_ERROR;
	}

	query_size = ntohl(header->query_size);
	header_size = sizeof(ReplicateHeader);
	buf_size = header_size + query_size + 4;
	buf = malloc(buf_size);
	memset(buf,0,buf_size);
	buf_size -= 4;
	memcpy(buf,header,header_size);
	if (query_size > 0)
	{
		memcpy((char *)(buf+header_size),query,query_size+1);
	}
	send_ptr = buf;

	timeout.tv_sec = 10;
	timeout.tv_usec = 0;

	/*
	 * Wait for something to happen.
	 */
	FD_ZERO(&wmask);
	FD_SET(cascade->sock,&wmask);
	rtn = select(cascade->sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
	if (rtn && FD_ISSET(cascade->sock, &wmask))
	{
		for (;;)
		{
			s = send(cascade->sock,send_ptr + send_size,buf_size - send_size ,0);
			if (s < 0){
				if (errno == EINTR)
				{
					continue;
				}
				free(buf);
				return STATUS_ERROR;
			}
			send_size += s;
			if (send_size == buf_size)
			{
				free(buf);
				return STATUS_OK;
			}
		}
	}
	return STATUS_OK;
}

static int
notice_cascade_data(int sock)
{
	char * func = "notice_cascade_data()";
	ReplicateServerInfo target;
	ReplicateServerInfo *cascade_data = NULL;
	ReplicateHeader header;
	int cnt = 0;
	int size = 0;

	if (sock <= 0)
	{
		return STATUS_ERROR;
	}
	memset(&target,0,sizeof(ReplicateServerInfo));
	target.sock = sock;

	cnt = get_cascade_data(cascade_data, ALL_CASCADE );
	if (cnt <= 0)
	{
		show_error("%s:cascade data is wrong",func);
		return STATUS_ERROR;
	}
	size = sizeof (ReplicateServerInfo) * cnt ;

	memset(&header,0,sizeof(ReplicateHeader));
	header.cmdSys = CMD_SYS_CASCADE ;
	header.cmdSts = CMD_STS_TO_LOWER ;
	header.cmdType = CMD_TYPE_UPDATE_ALL;
	header.query_size = htonl(size);
	PGRsend_cascade(&target, &header, (char *)cascade_data );
	if (cascade_data != NULL)
	{
		free(cascade_data);
	}
	return STATUS_OK;
}

int
PGRcascade_main(int sock, ReplicateHeader * header, char * query)
{
	switch (header->cmdSts)
	{
		case CMD_STS_TO_UPPER:
			if (header->cmdType == CMD_TYPE_ADD)
			{
				PGR_Cascade = true;
				/* add lower cascade data to myself */
				add_cascade_data(header,(ReplicateServerInfo*)query);
				/* send cascade data to upper */
				/* and receive new cascade data from upper */
				PGRstartup_cascade();
				/* return to lower with new cascade data */
				notice_cascade_data(sock);
			}
			break;
		case CMD_STS_TO_LOWER:
			/*
			 * use for cascading replication 
			 */
			break;
	}
	return STATUS_OK;
}

