/*   implementation of the own backend 
 *   to use UdmSearch without third-party libraries
 */

#include "udm_config.h"

#ifdef HAVE_FILES
#include <stdio.h>
#if (WIN32|WINNT)
#else
#include <unistd.h>
#endif
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#include "udm_common.h"
#include "udm_spell.h"
#include "udm_db.h"
#include "udm_charset.h"
#include "udm_robots.h"
#include "udm_mutex.h"
#include "udm_utils.h"
#include "udm_log.h"
#include "udm_conf.h"
#include "udm_crc32.h"

int UdmInitDB(){
	return(0);
}

__INDLIB__ char * UdmVersion(){
static char udmver[128];
	sprintf(udmver,"%s/UdmDB",VERSION);
	return(udmver);
}

static char * ul = NULL;
static char * ttag = NULL;

char * escstr(char *x,char *y){
	return(strcpy(x,y));
}


/************************* Backen functions *****************/
#define MAXMULTI	32
typedef struct txt_db {
	int connected;
	FILE * dict;
	FILE * url;
	FILE * robots;
	FILE * stopwords;
	int  crcdict[MAXMULTI];
	int  open_mode;
	int  errcode;
	char errmsg[1024];
} DB;

static void CloseDB(void * vdb){
DB * db;
int i;
	db=(DB*)vdb;
	if(db->dict)fclose(db->dict);
	if(db->url)fclose(db->url);
	if(db->robots)fclose(db->robots);
	if(db->stopwords)fclose(db->stopwords);
	for(i=0;i<MAXMULTI;i++)
		if(db->crcdict[i]>=0)
			close(db->crcdict[i]);
}

void UdmFreeDB(void * vdb){
DB * db;
	db=(DB*)vdb;
	if(db->connected)CloseDB(vdb);
	free(vdb);
}

static int InitDB(DB *db){
char * fmode;
int  flags;
int  i;
	
	if(db->connected)
		return(0);
	else
		db->connected=1;

	for(i=0;i<MAXMULTI;i++)
		db->crcdict[i]=-1;
	switch(db->open_mode){
		case UDM_OPEN_MODE_READ:
			fmode="r";
			flags=O_RDONLY;
			break;
		case UDM_OPEN_MODE_WRITE:
		default:
			fmode="w";
			flags=O_WRONLY|O_CREAT|O_TRUNC;
	}
	
	if(DBMode==UDM_DBMODE_SINGLE_CRC){
		char fname[UDMSTRSIZ]="";
		sprintf(fname,"%s%c%s",UDM_VAR_DIR,UDMSLASH,"dict");
#if (WIN32|WINNT)
		db->crcdict[0]=open(fname,flags);
#else
		db->crcdict[0]=open(fname,flags,0644);
#endif
		if(db->crcdict[0]<0){
			sprintf(db->errmsg,"Can't open dict file '%s' (%s)", fname, strerror(errno));
			db->errcode=1;
			return(1);
		}
		sprintf(fname,"%s%c%s",UDM_VAR_DIR,UDMSLASH,"url.txt");
		db->url=fopen(fname,fmode);
		if(!db->url){
			sprintf(db->errmsg,"Can't open URL file '%s' (%s)", fname, strerror(errno));
			db->errcode=1;
			return(1);
		}
	}else
	if(DBMode==UDM_DBMODE_MULTI_CRC){
		int i;
		char fname[UDMSTRSIZ];
		for(i=1;i<MAXMULTI;i++){	
			sprintf(fname,"%s%cdict%02d",UDM_VAR_DIR,UDMSLASH,i);
#if (WIN32|WINNT)
			db->crcdict[i]=open(fname,flags);
#else
			db->crcdict[i]=open(fname,flags,0644);
#endif
			if(db->crcdict[i]<0){
				sprintf(db->errmsg,"Can't open dict file '%s' (%s)", fname, strerror(errno));
				db->errcode=1;
				return(1);
			}
		}
		sprintf(fname,"%s%c%s",UDM_VAR_DIR,UDMSLASH,"url.txt");
		db->url=fopen(fname,fmode);
		if(!db->url){
			sprintf(db->errmsg,"Can't open URL file '%s' (%s)", fname, strerror(errno));
			db->errcode=1;
			return(1);
		}
	}else{
		char fname[UDMSTRSIZ];
		sprintf(fname,"%s%c%s",UDM_VAR_DIR,UDMSLASH,"dict.txt");
		db->dict=fopen(fname,fmode);
		if(!db->dict){
			sprintf(db->errmsg,"Can't open dict file '%s' (%s)", fname, strerror(errno));
			db->errcode=1;
			return(1);
		}
		sprintf(fname,"%s%c%s",UDM_VAR_DIR,UDMSLASH,"url.txt");
		db->url=fopen(fname,fmode);
		if(!db->url){
			sprintf(db->errmsg,"Can't open URL file '%s' (%s)", fname, strerror(errno));
			db->errcode=1;
			return(1);
		}
	}
	return(0);
}

void * UdmAllocDB(int mode){
DB * db;
	db=(DB*)malloc(sizeof(DB));
	db->dict=NULL;
	db->url=NULL;
	db->robots=NULL;
	db->stopwords=NULL;
	db->errcode=0;
	db->errmsg[0]=0;
	db->open_mode=mode;
	db->connected=0;
	return((void*)db);
}
char * UdmDBErrorMsg(void *db){
	return(((DB*)db)->errmsg);
}
int UdmDBErrorCode(void *db){
	return(((DB*)db)->errcode);
}


/*************  Words ****************************/
typedef struct cstruct {
	int		url_id;
	udmcrc32_t	wrd_id;
	char		weight;
} UDM_CRCWORD;
int UdmDeleteWordFromURL(UDM_INDEXER* Indexer,int url_id){
	return(0);
}
int UdmStoreWords(UDM_INDEXER * Indexer, int url_id, int status){
	if (InitDB(((DB*)(Indexer->db)))){
		UdmLog(Indexer->handle, UDM_LOG_ERROR, "Error: %s", ((DB*)(Indexer->db))->errmsg);
		exit(1);
	}
	if(DBMode==UDM_DBMODE_SINGLE_CRC){
		int fd,i,bytes;
		UDM_CRCWORD * cw;
		if(Indexer->nwords){
			fd=((DB*)(Indexer->db))->crcdict[0];
			bytes=sizeof(UDM_CRCWORD)*Indexer->nwords;
			cw=(UDM_CRCWORD*)malloc(bytes);
			for(i=0;i<Indexer->nwords;i++){
				if(Indexer->Word[i].count){
					cw[i].url_id=url_id;
					cw[i].weight=Indexer->Word[i].count,
					cw[i].wrd_id=UdmStrCRC32(Indexer->Word[i].word);
				}
			}
			if((bytes!=write(fd,(void *)cw,bytes))){
				sprintf(((DB*)(Indexer->db))->errmsg,"Can't write to dict file (%s)", strerror(errno));
				((DB*)(Indexer->db))->errcode=1;
				return(1);
			}
			free(cw);
		}
	}else
	if(DBMode==UDM_DBMODE_MULTI_CRC){
		int fd,i,j,len,bytes;
		UDM_CRCWORD * cw;
		if(Indexer->nwords){
			bytes=sizeof(UDM_CRCWORD)*Indexer->nwords;
			cw=(UDM_CRCWORD*)malloc(bytes);
			for(len=1;len<MAXMULTI;len++){
				j=0;
				for(i=0;i<Indexer->nwords;i++)
				if((Indexer->Word[i].count)&&
				    len==strlen(Indexer->Word[i].word)){
					cw[j].url_id=url_id;
					cw[j].weight=Indexer->Word[i].count,
					cw[j].wrd_id=UdmStrCRC32(Indexer->Word[i].word);
					j++;
				}
				fd=((DB*)(Indexer->db))->crcdict[len];
				bytes=sizeof(UDM_CRCWORD)*j;
				if((bytes!=write(fd,(void *)cw,bytes))){
					sprintf(((DB*)(Indexer->db))->errmsg,"Can't write to dict file (%s)", strerror(errno));
					((DB*)(Indexer->db))->errcode=1;
					return(1);
				}
			}
			free(cw);
		}
	}else{
		FILE *f;
		int i;
		f=((DB*)(Indexer->db))->dict;
		for(i=0;i<Indexer->nwords;i++){
			if(Indexer->Word[i].count){
				fprintf(f,"%d\t%d\t%s\n",url_id,
					Indexer->Word[i].count,
					Indexer->Word[i].word);
			}
		}
	}
	return(IND_OK);
}

/************** stopwords **********************/
int UdmLoadStopList(void * db){
	return(0);
}
UDM_STOPWORD * UdmIsStopWord(char *word){
	return(NULL);
}



/************ URLs stuff *******************************/

static int n_urls=0;
static UDM_DOCUMENT* URLs=NULL;


UDM_DOCUMENT * UdmGetDocInfo(UDM_INDEXER* Indexer,int expired_first){
int i;

UDM_DOCUMENT * Result;

	for(i=0;i<n_urls;i++){
		if(!URLs[i].indexed){
			URLs[i].indexed=1;
			Result=(UDM_DOCUMENT *)malloc(sizeof(UDM_DOCUMENT));
			Result->content_type=NULL;
			Result->title=NULL;
			Result->text=NULL;
			Result->last_index_time=NULL;
			Result->next_index_time=NULL;
			Result->keywords=NULL;
			Result->description=NULL;
			Result->url=strdup(URLs[i].url);
			Result->url_id=URLs[i].url_id;
			Result->size=0;
			Result->status=0;
			Result->last_index_time=strdup("0");
			Result->hops=URLs[i].hops;
			Result->crc32=0;
			Result->last_mod_time=0;
			return(Result);
		}
	}
	return(NULL);
}
int UdmAddURL(UDM_INDEXER* Indexer,char *url,int referrer,int hops, char * msg_id){
	if(!n_urls){
		URLs=(UDM_DOCUMENT *)malloc(sizeof(UDM_DOCUMENT));
	}else{
		URLs=(UDM_DOCUMENT *)realloc(URLs,sizeof(UDM_DOCUMENT)*(n_urls+1));
	}
	URLs[n_urls].url_id=n_urls+1;
	URLs[n_urls].indexed=0;
	URLs[n_urls].hops=hops;
	URLs[n_urls].referrer=referrer;
	URLs[n_urls].url=strdup(url);
	n_urls++;
	return(IND_OK);
}
int UdmDeleteUrl(UDM_INDEXER* Indexer,int url_id){
	return(IND_OK);
}
int UdmUpdateUrl(UDM_INDEXER* Indexer,int url_id,int status,int period){
	return(IND_OK);
}

#define XVAL(x)		(!x?"-":!x[0]?"-":x)
int UdmLongUpdateUrl(UDM_INDEXER* Indexer,
	int url_id,int status,int is_changed,
	int size,int period,char * tag,int index,
	time_t last_mod_time,
	char *text_escaped,
	char *title_escaped,
	char *content_type,
	char *keywords_escaped,
	char *descript_escaped,
	udmcrc32_t crc32,
	char *lang,
	char *category
#ifdef NEWS_EXT
	,
	char *hd_date,
	char *hd_subj,
	char *hd_from,
	char *hd_group,
	char *hd_ref,
	char *msg_id
#endif
	){
	
	int i;
	FILE *f;

	if (InitDB(((DB*)(Indexer->db)))){
	    UdmLog(Indexer->handle, UDM_LOG_ERROR, "Error: %s", ((DB*)(Indexer->db))->errmsg);
	    exit(1);
	}

	f=((DB*)(Indexer->db))->url;

	for(i=0;i<n_urls;i++)
	if(URLs[i].url_id==url_id){

		fprintf(f,"%d\t%s\t%d\t%d\t%s\t%s\t%u\t%s\t%li\t%s\t%s\t%s\t%s\n",
			url_id,
			XVAL(tag),status,size,
			XVAL(content_type),
			XVAL(lang),
			crc32,
			XVAL(URLs[i].url),
			last_mod_time,
			XVAL(text_escaped),
			XVAL(title_escaped),
			XVAL(keywords_escaped),
			XVAL(descript_escaped));
		break;
	}
	return(IND_OK);
}



/************* Clones ***************************/
int UdmFindOrigin(UDM_INDEXER* Indexer, udmcrc32_t crc32, int size){
	return(0);
}

int UdmUpdateClone(UDM_INDEXER* Indexer,int url_id,int status,int period,
	char *content_type,time_t last_mod_time,udmcrc32_t crc32){

	return(IND_OK);
}

UDM_DOCUMENT * UdmCloneList(void * db, udmcrc32_t crc32){
	return(NULL);
}
int UdmRegisterChild(UDM_INDEXER *Indexer, int parent_id, int child_id){
	return 0;
}
int UdmFindMsgID(UDM_INDEXER *Indexer, const char * msg_id){
	return 0;
}



/************* Clearing **************************/
int UdmDeleteAllFromDict(UDM_INDEXER* Indexer){
	return(IND_OK);
}
int UdmDeleteAllFromUrl(UDM_INDEXER* Indexer){
	return(IND_OK);
}
int UdmClearDB(UDM_INDEXER* Indexer){
	return(IND_OK);
}



/************** Robots.txt stuff *****************/
int UdmLoadRobots(UDM_INDEXER *Indexer){
	return(IND_OK);
}
int UdmDeleteRobotsFromHost(UDM_INDEXER* Indexer,char *hostinfo){
	return(IND_OK);
}
int UdmAddRobotsToHost(UDM_INDEXER* Indexer,char *hostinfo,char *s){
int nrobots;
	if(LockProc)LockProc(UDM_LOCK,UDM_LOCK_ROBOTS);
	if(!Robots){
		nrobots=0;
		Robots=(UDM_ROBOT *)malloc((2)*sizeof(UDM_ROBOT));
	}else{
		for(nrobots=0;Robots[nrobots].hostinfo;nrobots++);
		Robots=(UDM_ROBOT *)realloc(Robots,(nrobots+2)*sizeof(UDM_ROBOT));
	}
	Robots[nrobots].hostinfo=strdup(hostinfo);
	Robots[nrobots].path=strdup(s);
	nrobots++;
	Robots[nrobots].hostinfo=Robots[nrobots].path=NULL;

	if(LockProc)LockProc(UDM_UNLOCK,UDM_LOCK_ROBOTS);
	return(IND_OK);
}



/**************** search stuff *******************/
typedef struct wrd{
	int  url_id;
	int  count;
	int  weight;
} SEARCHWORD;

/* Function to sort words in weight order */
static int cmpword(const void *s1,const void *s2){
int res;
	if(!(res=(((SEARCHWORD*)s2)->count)-(((SEARCHWORD*)s1)->count)))
		if(!(res=(((SEARCHWORD*)s2)->weight-(((SEARCHWORD*)s1)->weight))))
			if(!(res=(((SEARCHWORD*)s1)->url_id-(((SEARCHWORD*)s2)->url_id))));
	return(res);
}
/* Function to sort word list in URL_ID order */
static int cmpurlid(const void *s1,const void *s2){
	return(((SEARCHWORD*)s2)->url_id-((SEARCHWORD*)s1)->url_id);
}


UDM_DOCUMENT * UdmFind(void * db,char *q,int np,int ps, int search_mode,int sort_order,char *wordinfo,int *found){
char *w,*lasttok;
char *words[100];
udmcrc32_t cwords[100];
char str[UDMSTRSIZ];
char curword[100];
int weight;
int url_id;
int num_words=0;
int i,j;
SEARCHWORD * wrd=NULL;
int nwrd=0;
UDM_DOCUMENT * r;
int first;
int skip=0;

	if(InitDB(db))
		return(NULL);

	UdmLoadStopList(db);
	*wordinfo=0;
	if(UdmDBErrorCode(db))return(NULL);
	w=UdmGetWord(q,&lasttok,UdmGetDefaultCharset());
	while(w){
		char ** ww;
		char *rw;

		ww=UdmNormalizeWord(w);
		rw=ww?*ww:w;

		if(UdmIsStopWord(rw)){
			sprintf(UDM_STREND(wordinfo)," %s :stopword",rw);
			w=UdmGetWord(NULL,&lasttok,UdmGetDefaultCharset());
			continue;
		}
		if(num_words<100){
			words[num_words]=strdup(rw);
			cwords[num_words]=UdmStrCRC32(rw);
			num_words++;
		}
		w=UdmGetWord(NULL,&lasttok,UdmGetDefaultCharset());
	}

	if(DBMode==UDM_DBMODE_SINGLE_CRC){
		UDM_CRCWORD cw[256];
		int bytes, wnum;
		while((bytes=read(((DB*)db)->crcdict[0],&cw,sizeof(cw)))){
			wnum=bytes/sizeof(UDM_CRCWORD);
			for(j=0;j<wnum;j++){
				for(i=0;i<num_words;i++){
					if(cwords[i]==cw[j].wrd_id){
						if(!nwrd){
							nwrd=1;
							wrd=(SEARCHWORD*)malloc(nwrd*sizeof(SEARCHWORD));
						}else{
							nwrd++;
							wrd=(SEARCHWORD*)realloc(wrd,nwrd*sizeof(SEARCHWORD));
						}
						wrd[nwrd-1].url_id=cw[j].url_id;
						wrd[nwrd-1].count=1;
						wrd[nwrd-1].weight=cw[j].weight;
					}
				}
			}
		}
	}else
	if(DBMode==UDM_DBMODE_MULTI_CRC){
		UDM_CRCWORD cw[256];
		int i,j,bytes,len,fd,wnum;
		
		for(i=0;i<num_words;i++){
			len=strlen(words[i]);
			if(len>=MAXMULTI)len=MAXMULTI-1;
			fd=((DB*)db)->crcdict[len];
			lseek(fd,0,SEEK_SET);
			while((bytes=read(fd,&cw,sizeof(cw)))){
				wnum=bytes/sizeof(UDM_CRCWORD);
				for(j=0;j<wnum;j++){
					if(cwords[i]==cw[j].wrd_id){
						if(!nwrd){
							nwrd=1;
							wrd=(SEARCHWORD*)malloc(nwrd*sizeof(SEARCHWORD));
						}else{
							nwrd++;
							wrd=(SEARCHWORD*)realloc(wrd,nwrd*sizeof(SEARCHWORD));
						}
						wrd[nwrd-1].url_id=cw[j].url_id;
						wrd[nwrd-1].count=1;
						wrd[nwrd-1].weight=cw[j].weight;
					}
				}
			}
		}
	}else{
		/* UDM_DBMODE_SINGLE */
		while(fgets(str,sizeof(str),((DB*)db)->dict)){
			w=UdmGetToken(str,"\t",&lasttok);
			if(!w)continue;url_id=atoi(w);
			w=UdmGetToken(NULL,"\t",&lasttok);
			if(!w)continue;weight=atoi(w);
			w=UdmGetToken(NULL,"\t\n",&lasttok);
			if(!w)continue;strcpy(curword,w);
			for(i=0;i<num_words;i++){
				if(!strcmp(curword,words[i])){
					if(!nwrd){
						nwrd=1;
						wrd=(SEARCHWORD*)malloc(nwrd*sizeof(SEARCHWORD));
					}else{
						nwrd++;
						wrd=(SEARCHWORD*)realloc(wrd,nwrd*sizeof(SEARCHWORD));
					}
					wrd[nwrd-1].url_id=url_id;
					wrd[nwrd-1].count=1;
					wrd[nwrd-1].weight=weight;
				}
			}
		}
	}	
	/* Now let's sort in url_id order then group results */
	if(!nwrd)return(0);
	qsort((void*)wrd,nwrd,sizeof(SEARCHWORD),cmpurlid);

	j=0;
	for(i=1;i<nwrd;i++){
		if(wrd[j].url_id==wrd[i].url_id){
			wrd[j].weight+=wrd[i].weight;
			wrd[j].count++;
		}else{
			j++;
			wrd[j].url_id=wrd[i].url_id;
			wrd[j].count=wrd[i].count;
			wrd[j].weight=wrd[i].weight;
		}
	}
	nwrd=j+1;

	/* Sort groupped by url_id results in weight order */
	qsort((void*)wrd,nwrd,sizeof(SEARCHWORD),cmpword);

	if(search_mode==UDM_MOD_ALL){
		for(i=0;i<nwrd;i++){
			if(wrd[i].count<num_words){
				nwrd=i;
				break;
			}
			
		}
	}

	if(!nwrd){
		free(wrd);
		return(0);
	}
	
	r=(UDM_DOCUMENT*)malloc(sizeof(UDM_DOCUMENT)*(nwrd+1));
	for(i=0;i<nwrd;i++){
		r[i].url_id=wrd[i].url_id;
		r[i].rating=wrd[i].count;
		r[i].content_type=NULL;
		r[i].crc32=0;
		r[i].url=NULL;
		r[i].last_mod_time=0;
		r[i].text=NULL;
		r[i].title=NULL;
		r[i].keywords=NULL;
		r[i].description=NULL;
	}
	free(wrd);
	r[nwrd].url_id=0;

	while(fgets(str,sizeof(str),((DB*)db)->url)){
		url_id=atoi(str);
		for(i=0;i<nwrd;i++){
			if(url_id==r[i].url_id){
				char * tag=NULL;
				int matched=1;
				w=UdmGetToken(str,"\t",&lasttok);
				for(j=0;w;j++){
					switch(j){
						case  1: tag=strdup(w);break;
						case  3: r[i].size=atoi(w);break;
						case  4: r[i].content_type=strdup(w);break;
						case  6: r[i].crc32=(udmcrc32_t)strtol(w, NULL, 10);break;
						case  7: r[i].url=strdup(w);break;
						case  8: r[i].last_mod_time=atol(w);break;
						case  9: r[i].text=strdup(w);break;
						case 10: r[i].title=strdup(w);break;
						case 11: r[i].keywords=strdup(w);break;
						case 12: r[i].description=strdup(w);break;
					}
					w=UdmGetToken(NULL,"\t",&lasttok);
				}

				/* Check "ul" subsection match  */
				if(ul)matched=(strstr(r[i].url,ul)!=NULL);
				/* Check "tag" subsection match */
				if((matched)&&(ttag)&&(tag))matched=(!strcmp(ttag,tag));

				if(!matched){
					/* Skip this URL */
					UDM_FREE(r[i].content_type);
					UDM_FREE(r[i].url);
					UDM_FREE(r[i].text);
					UDM_FREE(r[i].title);
					UDM_FREE(r[i].keywords);
					UDM_FREE(r[i].description);
					skip++;
					memmove(r+i,r+i+1,(nwrd-i)*sizeof(UDM_DOCUMENT));
				}
			}
		}
	}
	*found=nwrd-skip;
	if(!*found){
		free(r);
		return(NULL);
	}
	first=np*ps;
	if(first>nwrd-skip)first=nwrd-skip;
	if(first>0)memmove(r,r+first,(nwrd-skip-first+1)*sizeof(UDM_DOCUMENT));
	if((nwrd-skip-first)>ps)r[ps].url_id=0;
	return(r);
}



/***************** Misc functions ***************/

int UdmInsertSpell(UDM_INDEXER *Indexer,char *flag,char *lang,char *word){
    return(0);
}

UDM_SPELL * UdmFindWordDB(char *word) {
    return(NULL);
}

int UdmInsertAffix(UDM_INDEXER *Indexer,char flag,char *lang,char *mask,char *find,char *repl,char *type){
    return(0);
}

int UdmDBImportAffixes(void *db){
    return(0);
}

UDM_CATEGORY * UdmCatPath(void * db,char * category){
	return(NULL);
}

UDM_CATEGORY * UdmCatList(void * db,char * category){
	return(NULL);
}

__INDLIB__ int  UdmGetStatistics(){
	return(IND_OK);
}
int UdmMarkForReindex(UDM_INDEXER* Indexer){
	return(IND_OK);
}
__INDLIB__ int UdmAddTagLimit(char * tag){
	ttag=strdup(tag);
	return(0);
}
__INDLIB__ int UdmAddStatusLimit(int status){
	return(0);
}
__INDLIB__ int UdmAddURLLimit(char *URL){
	UDM_FREE(ul);
	ul=strdup(URL);
	return(0);
}
__INDLIB__ int UdmAddLangLimit(char *URL){
	return(0);
}

__INDLIB__ int UdmAddCatLimit(char * cat){
	return(0);
}

__INDLIB__ int UdmClearURLLimit(){
	UDM_FREE(ul);
	UDM_FREE(ttag);
	return(0);
}
__INDLIB__ int UdmClearLimits(){
	UDM_FREE(ul)
	return(0);
}
__INDLIB__ int UdmGetReferers(UDM_INDEXER* Indexer){
	return(IND_OK);
}
__INDLIB__ int UdmAddTimeLimit(struct udm_stl_info_t * stl){
 return 0;
}

#endif
