/*  VER 229   TAB P   $Id: getarticle.c,v 1.26.2.1 2001/02/14 06:47:31 egil Exp $
 *
 *  fetch articles via an NNTP server
 *
 *  all NNTP requests are placed in a request queue.
 *  when the response arrives, it is taken care of by a 
 *  function called handle_xxx()
 *
 *  copyright 1996, 1997, 1998 Egil Kvaleberg, egil@kvaleberg.no
 *  the GNU General Public License applies
 *
 *  $Log: getarticle.c,v $
 *  Revision 1.26.2.1  2001/02/14 06:47:31  egil
 *  Compiler warning
 *
 *  Revision 1.26  1999/04/18 09:28:45  src
 *  Fix for --tag with --no-path
 *
 *  Revision 1.25  1999/04/07 07:10:55  src
 *  Implemented --tag
 *
 *  Revision 1.24  1999/03/31 05:53:45  src
 *  Seperated MAXHEADERSIZE from NNTP_STRLEN
 *
 *  Revision 1.23  1999/03/24 03:52:58  src
 *  Implemented "newsx" as magic exclude pattern
 *
 *  Revision 1.22  1999/03/18 05:41:19  src
 *  Version 1.4pre
 *
 *  Revision 1.21  1999/03/16 17:35:38  src
 *  Version 1.4pre
 *
 *  Revision 1.20  1999/03/15 08:51:35  src
 *  Nothing.
 *
 *  Revision 1.19  1999/03/11 07:30:00  src
 *  Implemented check for spool free space
 *
 *  Revision 1.18  1999/03/07 14:58:18  src
 *  Read newsconfig supported. Storage API supported.
 *
 *  Revision 1.17  1999/03/04 17:19:27  src
 *  Fix for removed groups that weren't
 *
 *  Revision 1.16  1998/11/22 08:23:07  src
 *  Added --forget-inactive
 *
 *  Revision 1.15  1998/11/21 19:14:23  src
 *  Added --filter option
 *
 *  Revision 1.14  1998/10/25 13:50:10  src
 *  STAT fix from Steinar Haug, sthaug@nethelp.no
 *
 *  Revision 1.13  1998/10/23 03:37:42  src
 *  Path line error report fixed, reported by Uli Zappe
 *
 *  Revision 1.12  1998/09/21 10:04:29  src
 *  Added new command line options for --inews
 *
 *  Revision 1.11  1998/09/11 09:17:42  src
 *  Check path consistency (--no-path) and length (--max-path)
 *  GNU style option --help, --version, --dry-run, changed --noxx to --no-xx
 *  Check for putenv and setenv, added xstrcpy
 *
 *  Revision 1.10  1998/09/09 07:32:11  src
 *  Version 1.1
 *
 *  Revision 1.9  1998/09/03 02:49:29  src
 *  Fixed stuff detected by -Wall
 *
 *  Revision 1.8  1998/07/12 09:39:28  src
 *  newsx version 1.0
 */

#include "common.h"
#include "proto.h"
#include "options.h"
#include "statistics.h"
#include "newsconfig.h"
#include "news.h"
#include "nntp.h"

long bytes_in_spool; /* external: number of bytes read */
long latest_where;   /* external: last article actually processed */
long latest_next;    /* latest response from a NEXT, -1 if none */
int next_at_witts_end; /* NEXT has reached the end */
int stat_failures;   /* STAT failures in a row */

static int next_requests;   /* how many outstanding NEXTs */
static int latest_contains_msgnum; /* latest request contained a message number */

/* 
 *  BUG: this can be a real memory hog
 *  BUG: perhaps we should have had a maxlimit here...
 */
char *temp_store = 0;
long temp_len = 0;
#define TEMP_STEP 100000 /* BUG: tuneable? */

static int no_stat = 0; /* for hosts that lack a STAT */

/*
 *  see if message-ID is fake
 */
static int
is_fake(char *msgid)
{
    while (*msgid && isspace(*msgid)) ++msgid;
    return is_tag(msgid,"<fake@");
}

/*
 *  read an article proper
 *  and feed it to the spool
 *  arguments are just for error reports
 *  return false on errors that mean we should not continue
 */
static int 
read_article(long where,char *group)
{
    char line[MAXHEADERSIZE+1];
    char path_line[MAXHEADERSIZE+1];
    int reject = 0;
    int newline = 1;
    int header = 1;
    int path_ok = -1;
    long len;
    long bytecount = 0L; /* BUG: */

    line[MAXHEADERSIZE] = '\0'; /* better safe than sorry */
    path_line[0] = '\0';

    /* fetch the article, header and body */
    for (;;) {
	if (!get_server_msg(line, MAXHEADERSIZE)) {
	    /* timeout: simply give up */
	    return 0;
	}
	if (filter) filter_line(line); /* send to filter too */
	len = strlen(line);
	gross_bytecount += len;

	/* end of file */
	if (newline && line[0]=='.' && (line[1]=='\r' || line[1]=='\n')) break;

	/* process header part */
	if (header && newline && len > 0) {
	    switch (line[0]) {
	    case '\r':
	    case '\n':
		header = 0;
		break;
	    case 'M':         /* 12345678901 */
		if (is_tag(line,"Message-ID:")) {
		    if (!keep_fake && is_fake(line+11)) {
			/* Dnews style faked message */
			reject = 1;
		    }
		}
		break;
	    case 'P':
		if (is_tag(line,"Path:")) {
		    char *path = skipsp(line+5);
		    int add_tag = tag_opt;
		    /* BUG: what about veeeery long lines - point into buffer instead */
		    /* BUG: or can there really be long lines? */

		    /* fix by Stefan Huelswitt <huels@iname.com> */
		    /* save for error reporting et al. */
		    strcpy(path_line,path);

		    if (nopath_opt) {
			/* ignore path check */
			path_ok = 1;
		    } else if ((path_ok = traverse_path(path)) != 1) {
			/* path did not match */

			if (newsx_path) {
			    add_tag = 1;
			}
		    }

		    if (add_tag) {
			if (len < MAXHEADERSIZE-(2+strlen(NEWSX_PATH))) {
			/* save the day by inserting "magic" path */
			    sprintf(line,"Path: %s!%s",NEWSX_PATH,
							path_line);
			    len = strlen(line);

			    log_msg(L_DEBUGMORE,"inserting %s path",
								NEWSX_PATH);
			    if (newsx_path) path_ok = 1;
			} else {
			    log_msg(L_ERR,
			    "No room to extend path in ARTICLE %ld, adjust MAXHEADERSIZE",
								where);
			    reject = -1;
			}
		    }
		    break;
		}
		break;
	    default:
		break;
	    }
	}

	/* find and strip newlines */
	newline = 0;
	while (len > 0 && (line[len-1]=='\r' || line[len-1]=='\n')) {
	    /* fix by: "J. Richard Sladkey" <jrs@foliage.com> */
	    if (line[len-1]=='\n') newline=1;
	    --len;
	}

	if (bytecount+len+newline > temp_len) {
	     /* there is not enough room */
	     temp_store = temp_store ? realloc(temp_store,temp_len+TEMP_STEP)
				     : malloc(TEMP_STEP);
	     if (!temp_store) {
		 log_msg(L_ERR,"out of memory");
		 return 0;
	     }
	     temp_len += TEMP_STEP;
	}

	if (len > 0) memcpy(temp_store+bytecount,line,len);
	bytecount += len;
	if (newline) temp_store[bytecount++] = '\n';
    }

    /* reject article? */
    if (reject) {
	if (reject > 0) {
	    log_msg(L_DEBUG,"ARTICLE %ld rejected as fake",where);
	    ++fake_articles;
	}
	return 0;
    }

    /* get status of filtering */
    if (filter && !reject) {
	/* get filter status */
	char id[20];
	sprintf(id,"%ld",where);
	if (filter_reject(id)) {
	    log_msg(L_DEBUG,"ARTICLE %ld rejected by full filter",where);
	    ++filter_articles;
	    return 0;
	}
    }

    /* see if the path is suspect */
    if (!nopath_opt && path_ok != 1) {
	if (path_line[0]) {
	    /* specified Path is wrong */
	    log_msg(L_ERR,"ARTICLE %ld in %s did not contain '%s' in 'Path:'",
				   where, group,            get_exclusion());
	    log_msg(L_ERR,"Path line was '%s'", path_line);
	} else {
	    /* no Path specified */
	    if (!add_header || strncmp(add_header,"Path",4)!=0) {
		log_msg(L_ERR,"ARTICLE %ld in %s did not contain a 'Path:'", 
				       where, group);
	    }
	}
	return 0;
    }

    if (!write_incoming(temp_store,bytecount,(path_ok >= 0))) return 0;

    bytes_in_spool += bytecount;
    net_bytecount += bytecount;
    ++fetched_articles;

    return 1;
}

/*
 *  fetch current article proper
 *  return false if no point in continuing
 */
static int 
handle_article(char *status, long where, char *group)
{
    int ok;
    long a;
    char *endptr;

    switch (strtoul(status,&endptr,10)) {
    case OK_ARTICLE:                    /* article OK */
	/* verify article number against outstanding requests... */
	a = strtoul(endptr,&endptr,10); /* verify article number */
	if (a != where) {
	    if (a==0 && window <= 1) {
		/* OK, we'll allow it - presumably a non-conforming server */
		static int told = 0;
		if (!told) {
		    log_msg(L_INFO,"no article number in ARTICLE response: %s",
								       status);
		    told = 1;
		}
	    } else {
		log_msg(L_ERR,"ARTICLE %ld in %s out of phase: %s",
					where, group, status);
		return 0;
	    }
	}
	if ((ok = read_article(where,group))) {
	    /* article is presumably OK */
	    latest_where = where;

	    /* show that something is happening */
	    if (debug_opt == 2) show_activity();
	}
	return ok;

    case ERR_NOARTIG:                   /* no such article in group */
    case ERR_NOART:                     /* no such article */
	/* BUG: message does not contain an article number... */
	/* article has disappeared, ignore it */
	log_msg(L_DEBUG,"article in %s disappeared: %s",group,status);
	latest_where = where;

	return 1;

    default:                            /* otherwise, protocol error */
	log_msg(L_ERR,"NNTP article read error: got \"%s\"", status);
	/* stop here */
	return 0;
    }
}

/*
 *  fetch current article proper
 *  return false if no point in continuing
 */
static int 
current_article(long where,char *group)
{
    char request[NNTP_STRLEN+1];

    if (no_stat) {
	sprintf(request, " %ld", where);
	progtitle2(group, request);
    }

    sprintf(request, "ARTICLE %ld%s", where, newline);
    latest_contains_msgnum = 1;
    return put_request(request,handle_article,where,group);
}

/*
 *  process message ID from STAT or NEXT
 *  either by doing nothing, or by requesting an ARTICLE
 */
static int 
process_msgid(char *msgid,long where, char *group)
{
    /* check if fake message */
    if (!keep_fake && is_fake(msgid)) {
	log_msg(L_DEBUGMORE,"MSGID %s is fake",msgid);
	++fake_articles;
	latest_where = where;
	return 1;
    }

    /* check if already in news history database */
    if ((!cfg_history || cfg_history[0]) && history_lookup(msgid)) {
	++history_articles;
	latest_where = where;
	return 1;
    }
    /* check if read already */
    if (!new_msgid(msgid)) {
	++already_articles;
	latest_where = where;
	return 1;
    }
    /* check if triggered by message ID filter */
    if (do_mfilter(msgid)) {
	log_msg(L_DEBUGMORE,"MSGID %s filtered by message ID",msgid);
	++filter_articles;
	latest_where = where;
	return 1;
    }
    if (do_filter(msgid,group)) {
	log_msg(L_DEBUGMORE,"MSGID %s filtered by header",msgid);
	++filter_articles;
	latest_where = where;
	return 1;
    }
    /* BUG: have max file size?? */

    log_msg(L_DEBUGMORE,"fetching article %ld",where);
    return current_article(where,group);
}

/*
 *  handle article STAT result  
 */
static int 
handle_stat(char *status,long where,char *group)
{
    char msgid[NNTP_STRLEN+1];
    long a;
    char *endptr;

    switch (strtoul(status,&endptr,10)) {
    case OK_NOTEXT:                     /* follows STAT... */
	/* 223 3800 <jeqk9rzgqa4.fsf@storm.stud.ntnu.no> status */
	if (sscanf(endptr,"%ld %[^ \n\t]",&a,msgid) != 2) {
	    log_msg(L_ERR,"bad STAT reponse: %s", status);
	    return 0;
	}
	if (a != where) {
	    log_msg(L_ERR,"STAT %ld out of phase: %s", where, status);
	    return 0;
	}
	stat_failures = 0;

	return process_msgid(msgid,a,group);

    case ERR_COMMAND:                   /* STAT is not implemented */
	log_msg(L_INFO,"server lacks STAT command: %s",status);
	no_stat = 1;
    case 99: /* dummy if no STAT command */
	log_msg(L_DEBUGMORE,"unconditionally fetching article %ld",where);
	return current_article(where,group);

    case ERR_NOARTIG:                   /* no such article in group */
    case ERR_NOART:                     /* no such article */
	/* article no longer there, ignore it */
	++stat_failures;
	log_msg(L_DEBUGMORE,"article %ld in %s not on server, stat_failures %d",
				     where,group,stat_failures);
	return 1;

    default:                            /* otherwise, protocol error */
	log_msg(L_ERR,"NNTP unknown STAT error: got \"%s\"", status);
	/* stop here */
	return 0;
    }
}

/*
 *  handle article NEXT result
 */
static int 
handle_next(char *status,long where,char *group)
{
    char msgid[NNTP_STRLEN+1];
    long a;
    char *endptr;

    --next_requests;

    switch (strtoul(status,&endptr,10)) {
    case OK_NOTEXT:                     /* follows NEXT.. */
	/* 223 3800 <jeqk9rzgqa4.fsf@storm.stud.ntnu.no> */
	if (sscanf(endptr,"%ld %[^ \n\t]",&a,msgid) != 2) {
	    log_msg(L_ERR,"bad NEXT reponse: %s", status);
	    /* try to recover in a fashion */
	    no_next = 1;
	    sprintf(msgid, "STAT %ld%s", where,newline);
	    latest_contains_msgnum = 1;
	    return put_request(msgid,handle_stat,where,group);
	}
	latest_next = a;
	return process_msgid(msgid,a,group);

    case ERR_COMMAND:                   /* NEXT is not implemented */
	log_msg(L_INFO,"server lacks NEXT command: %s",status);
	no_next = 1;
	log_msg(L_DEBUGMORE,"unconditionally fetching article %ld",where);
	return current_article(where,group);

    case ERR_NONEXT:                    /* no next article */
	/* article no longer there, ignore it */
	next_at_witts_end = 1;
	log_msg(L_DEBUGMORE,"NEXT reports no more articles in %s",group);
	return 1;

    case ERR_NOARTIG:                   /* no such article in group */
    case ERR_NOART:                     /* no such article */
	next_at_witts_end = 1;
	log_msg(L_ERR,"NNTP unfamiliar NEXT reponse: got \"%s\"", status);
	return 1;

    default:                            /* otherwise, protocol error */
	log_msg(L_ERR,"NNTP unknown NEXT error: got \"%s\"", status);
	/* stop here */
	return 0;
    }
}

/*
 *  fetch an article in current group
 *  the group name is just for error reporting and filtering
 *  return false if no point in continuing
 */
int 
fetch_article(long where,int first,char *group)
{
    char request[NNTP_STRLEN+1];
    char status[NNTP_STRLEN+1];

    if (first) {
	next_requests = 0;
	stat_failures = 0;
	latest_contains_msgnum = next_at_witts_end = 0;
	latest_next = -1;
    }

    if (where <= latest_next) {
	/* no need: NEXT has already skipped beyond this point */
	return 1;
    }

    if (no_stat) {
	sprintf(status,"%d",99);
	/* fake dummy STAT message that will cause an ARTICLE */
	return handle_stat(status,where,group);
    }

    /* enquire article status and message ID */
    sprintf(request, " %ld", where);
    progtitle2(group, request);

    if (!first && !no_next && stat_failures >= stat_attempts) {
	if (next_at_witts_end) {
	    /* nothing more to do */
	    return 1;
	}
	while (latest_contains_msgnum && next_requests > 0) {
	    /*
	     *  there are unanswered NEXT requests in the queue,
	     *  followed by an ARTICLE or STAT. this means we must
	     *  wait untill we get the answer from the last of
	     *  the NEXT request. also, a successfull STAT response
	     *  will cause us to reconsider re. the NEXT request
	     */
	    int n;
	    log_msg(L_DEBUGMORE,"wait before issuing NEXT");
	    if (!(n = process_request())) return 0;

	    if (stat_failures < stat_attempts) {
		/* a good STAT has arrived, so skip the NEXT */
		log_msg(L_DEBUGMORE,"a good STAT arrived, do STAT instead");
		goto issue_a_stat;
	    }

	    if (n < 0) break; /* queue is empty */
	}
	if (stat_failures > 0) {
	    log_msg(L_DEBUGMORE,"before NEXT: stat_failures %d", stat_failures);
	    goto issue_a_stat;
	}
	sprintf(request, "NEXT%s", newline);
	++next_requests;
	latest_contains_msgnum = 0;
	return put_request(request,handle_next,where,group);
    }

    /* issue a standard STAT */
  issue_a_stat:
    sprintf(request, "STAT %ld%s", where,newline);
    latest_contains_msgnum = 1;
    return put_request(request,handle_stat,where,group);
}

/*
 *  select a group              
 *  return 1 if OK, 0 if no group, -1 if no point in continuing
 */
static int 
select_a_group(char *status,char *group,long *firstp,long *lastp)
{
    long msgs;
    char *endptr;

    switch (strtoul(status,&endptr,10)) {
    case OK_GROUP:                      /* Group selected */
	if (sscanf(endptr,"%ld %ld %ld",&msgs,firstp,lastp) != 3) {
	    log_msg(L_ERR,"group select bad format: \"%s\"", status);
	    return -1;
	}
	++fetched_groups;
	return 1;

    case ERR_NOGROUP:                   /* server haven't seen it before */
	log_msg(L_ERR,"server does not carry group \"%s\"", group);
	return 0;

    case ERR_NOAUTH:                    /* server won't allow us in here */
	log_msg(L_ERR,"authorization required for group \"%s\"", group);
	return 0;

    default:                            /* otherwise, protocol error */
	log_msg(L_ERR,"NNTP group select protocol error: got \"%s\"", status);
	return -1;
    }
}

/*
 *  select a group              
 *  return 1 if OK, 0 if no group, -1 if no point in continuing
 */
int 
select_group(char *group,long *firstp,long *lastp)
{
    char request[NNTP_STRLEN+1];
    char status[NNTP_STRLEN+1];

    progtitle(group);

    sprintf(request, "GROUP %s%s", group,newline);

    if (!put_server(request)) return -1;

    /* get status */
    if (!get_server_nntp(status, sizeof(status))) return -1;

    return select_a_group(status,group,firstp,lastp);
}

