/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * Pan - A Newsreader for Gtk+
 * Copyright (C) 2002  Charles Kerr <charles@rebelbase.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <config.h>

#include <glib.h>

#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>

#include <pan/base/acache.h>
#include <pan/base/article.h>
#include <pan/base/article-thread.h>
#include <pan/base/base-prefs.h>
#include <pan/base/debug.h>
#include <pan/base/pan-i18n.h>
#include <pan/base/pan-glib-extensions.h>

/**
 * Skip the "Re: " part of a subject header, if any
 * @param subject
 * @return the non-"Re:" portion of the subject header
 */
#define skip_reply_leader(a) \
	(((a!=NULL) && \
	  (a[0]=='R' || a[0]=='r') && \
	  (a[1]=='E' || a[1]=='e') && \
	  (a[2]==':') && \
	  (a[3]==' ')) ? a+4 : a)

/**
 * Normalized Article
 */
typedef struct
{
	gchar * subject;
	gboolean is_reply;
	Article * a;
}
Norm;

/**
 * Normalizing a subject header involves tearing out the multipart
 * substrings ("(21/42)" or "[12|213]") and converting it all to
 * one case so that we can use strcmp instead of g_strcasecmp.
 *
 * When we're threading articles, it's a big speedup to normalize the
 * subjects at the outset instead of normalizing them in each comparison.
 */
static size_t
normalize_subject (char * buf, const Article * a)
{
	static gboolean _inited = FALSE;
	static gboolean _keep_chars[UCHAR_MAX+1];
	register const guchar * in = (guchar*) skip_reply_leader (a->subject);
	register char * out = buf;
	const gboolean multipart = a->parts != 0;

	/* populate the _keep_chars array */
	if (!_inited) {
		int i;
		for (i=0; i<=UCHAR_MAX; ++i) {
			guchar uch = (guchar)i;
			_keep_chars[i] = isalnum(uch) || isdigit(uch) || isspace(uch);
		}
		_inited = TRUE;
	}

	/* skip the leading noise */
	while (*in && !isalnum(*in))
		++in;

	while (*in)
	{
		/* strip multipart information */
		if (multipart && (*in=='('||*in=='[') && isdigit(in[1])) {
			const guchar * start = in;
			while (*in && *in!=']' && *in!=')') {
				if (isalpha(*in)) { /* oops, not multipart information */
					in = ++start;
					break;
				} 
				++in;
			}
			continue;
		}

		/* strip out junk that breaks sorting */
		if (_keep_chars[*in])
			*out++ = tolower(*in);

		++in;
	}

	*out = '\0';
	return out - buf;
}


/**
 * This Normalizes a group of articles in just two memory blocks.
 * These blocks will need to be g_free()d when the client is done with them.
 */
static void
normalize_articles (Article    ** articles,
                    gint          qty,
                    Norm       ** alloc_and_setme_norm,
                    gchar      ** alloc_and_setme_str)
{
	gint i;
	glong str_buf_idx;
	glong str_len;
	gchar * str_buf;
	Norm * norm_buf;

	/* sanity clause */
	g_return_if_fail (articles!=NULL);
	g_return_if_fail (qty>0);
	g_return_if_fail (articles_are_valid ((const Article **)articles, qty));
	g_return_if_fail (alloc_and_setme_norm!=NULL);
	g_return_if_fail (alloc_and_setme_str!=NULL);

	/* alloc a buf for the norms */
	*alloc_and_setme_norm = norm_buf = g_new (Norm, qty);

	/* alloc a buf for the subject */
	str_len = 0;
	for (i=0; i<qty; ++i)
		str_len += strlen (articles[i]->subject) + 2;
	*alloc_and_setme_str = str_buf = g_new (char, str_len);
	
	/* normalize the articles */
	str_buf_idx = 0;
	for (i=0; i<qty; ++i) {
		Article * a = articles[i];
		norm_buf[i].a = a;
		norm_buf[i].is_reply = skip_reply_leader (a->subject) != a->subject ? 1 : 0;
		norm_buf[i].subject = str_buf + str_buf_idx;
		str_buf_idx += normalize_subject (norm_buf[i].subject, a) + 1;
	}
}


static int
compare_pA_to_pA_by_part (const void * va, const void * vb)
{
	int             value;
	const Article * a = (const Article *)va;
	const Article * b = (const Article *)vb;
	gboolean        a_is_reply;
	gboolean        b_is_reply;

	/* already normalized, so check multipart first */
	if (a->parts == b->parts)
		return a->part - b->part;

	/* order by subject */
	if ((value = strcmp (a->subject, b->subject)))
		return value;

	/* the rest is probably unnecessary. just carry-over from
	 * compare_pA_to_pA_by_subject */

	/* if one but not both is a reply, the reply goes second */
	a_is_reply = skip_reply_leader (a->subject) != a->subject ? 1 : 0;
	b_is_reply = skip_reply_leader (b->subject) != b->subject ? 1 : 0;
	if (a_is_reply != b_is_reply)
		return a_is_reply ? 1 : -1;

	/* oldest goes first... */
	if (a->date < b->date)
		value = -1;
	else if (a->date > b->date)
		value = 1;
	else
		value = 0;
	return value;
}

static int
compare_pN_to_pN_by_subject (const void * va, const void * vb)
{
	register int value;
	const register Norm * a = (const Norm *)va;
	const register Norm * b = (const Norm *)vb;

	/* subject is the primary key, of course... */
	if ((value = *a->subject - *b->subject))
		return value;
	if ((value = strcmp (a->subject, b->subject)))
		return value;

	/* if one but not both is a reply, the reply goes second */
	if (a->is_reply != b->is_reply)
		return a->is_reply ? 1 : -1;

	/* check multipart */
	if ((value = a->a->part - b->a->part))
		return value;

	/* oldest goes first... */
	if (a->a->date < b->a->date)
		value = -1;
	else if (a->a->date > b->a->date)
		value = 1;
	else
		value = 0;
	return value;
}


static int
compare_ppA_to_ppA_by_linecount (const void* va, const void* vb)
{
        const register Article * a = *(const Article**)va;
        const register Article * b = *(const Article**)vb;
	return article_get_combined_linecount(a) - article_get_combined_linecount(b);
}

static int
compare_ppA_to_ppA_by_action (const void * va, const void * vb)
{
	const register Article * a = *(const Article **)va;
	const register Article * b = *(const Article **)vb;
	int ia, ib;

#if 0
	ia = article
	ia = article_flag_on (a, STATE_SAVE_QUEUED) ? 1 : 0;
	ib = article_flag_on (b, STATE_SAVE_QUEUED) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	ia = article_flag_on (a, STATE_DOWNLOAD_FLAGGED) ? 1 : 0;
	ib = article_flag_on (b, STATE_DOWNLOAD_FLAGGED) ? 1 : 0;
	if (ia != ib)
		return ib - ia;
#else
#warning fixme -- how to sort on queue?
#endif

	ia = article_flag_on (a, STATE_DECODED) ? 1 : 0;
	ib = article_flag_on (b, STATE_DECODED) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	ia = acache_has_message (a->message_id) ? 1 : 0;
	ib = acache_has_message (b->message_id) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	return 0;
}

static int
compare_ppA_to_ppA_by_read (const void * va, const void * vb)
{
	const register Article * a = *(const Article **)va;
	const register Article * b = *(const Article **)vb;
	const gboolean a_is_read = article_is_read (a);
	const gboolean b_is_read = article_is_read (b);
	const gboolean a_is_new = article_is_new (a);
	const gboolean b_is_new = article_is_new (b);
	gint ia, ib;

	ia = (a_is_new?1:0) + a->new_children;
	ib = (b_is_new?1:0) + b->new_children;
	if (ia != ib)
		return ib - ia;

	ia = (a_is_read?0:1) + a->unread_children;
	ib = (b_is_read?0:1) + b->unread_children;
	if (ia != ib)
		return ib - ia;

	ia = a_is_read ? 0 : 1;
	ib = b_is_read ? 0 : 1;
	if (ia != ib)
		return ib - ia;

	ia = article_flag_on (a, STATE_MULTIPART_ALL) ? 1 : 0;
	ib = article_flag_on (b, STATE_MULTIPART_ALL) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	ia = article_flag_on (a, STATE_MULTIPART_SOME) ? 1 : 0;
	ib = article_flag_on (b, STATE_MULTIPART_SOME) ? 1 : 0;
	if (ia != ib)
		return ib - ia;

	return 0;
}


static int
compare_ppA_to_ppA_by_date (const void* va, const void* vb)
{
	gint value;
	time_t date_a = (*(const Article**)va)->date;
	time_t date_b = (*(const Article**)vb)->date;

	if (date_a < date_b)
		value = -1;
	else if (date_a > date_b)
		value = 1;
	else
		value = 0;
	return value;
}

static int
compare_ppA_to_ppA_by_message_id (const void* a, const void* b)
{
	const gchar * msg_id_a = (*(const Article**)a)->message_id;
	const gchar * msg_id_b = (*(const Article**)b)->message_id;
	return strcmp (msg_id_a, msg_id_b);
}

typedef struct
{
	char name[128];
	Article * article;
}
ArticleStruct;

static int
compare_pAS_to_pAS_by_data (const void * va, const void * vb)
{
	const ArticleStruct * a = (const ArticleStruct*)va;
	const ArticleStruct * b = (const ArticleStruct*)vb;
	return strcmp (a->name, b->name);
}

void
sort_articles (Article      ** buf,
               size_t          article_qty,
               int             sort_type,
               gboolean        ascending)
{
	g_return_if_fail (articles_are_valid ((const Article **)buf, article_qty));

	if (!article_qty)
		return;

	switch (sort_type)
	{
		case ARTICLE_SORT_AUTHOR:
		{
			size_t i;
			ArticleStruct * as = g_new (ArticleStruct, article_qty);
			for (i=0; i<article_qty; ++i)
			{
				as[i].article = buf[i];
				article_get_short_author_str (buf[i], as[i].name, sizeof(as[i].name));
				g_strdown (as[i].name);
			}
			msort (as,
			       article_qty,
			       sizeof(ArticleStruct),
			       compare_pAS_to_pAS_by_data);
			for (i=0; i<article_qty; ++i)
				buf[i] = as[i].article;
			g_free (as);
			break;
		}
		case ARTICLE_SORT_LINES:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_linecount);
			break;
		}
		case ARTICLE_SORT_DATE:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_date);
			break;
		}
		case ARTICLE_SORT_MSG_ID:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_message_id);
			break;
		}
		case ARTICLE_SORT_ACTION_STATE:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_action);
			break;
		}
		case ARTICLE_SORT_READ_STATE:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_read);
			break;
		}
		case ARTICLE_SORT_SUBJECT:
		default:
		{
			gint i;
			Norm * norm_buf = NULL;
			char * str_buf = NULL;
			normalize_articles (buf, article_qty, &norm_buf, &str_buf);
			msort (norm_buf, article_qty, sizeof(Norm), compare_pN_to_pN_by_subject);
			for (i=0; i<article_qty; ++i)
				buf[i] = ARTICLE(norm_buf[i].a);
			g_free (norm_buf);
			g_free (str_buf);
		}
	}

	/* if not ascending, reverse the order */
	if (!ascending) {
		const size_t mid = article_qty/2;
		size_t i;
		for (i=0; i!=mid; ++i) { /* swap */
			Article * tmp = buf[i];
			buf[i] = buf[article_qty-1-i];
			buf[article_qty-1-i] = tmp;
		}
	}
}


static gboolean
is_child_of (const Article * child,
             const Article * parent)
{
	g_return_val_if_fail (child!=NULL, FALSE);
	g_return_val_if_fail (parent!=NULL, FALSE);

	for (;;)
	{
		if (!child)
			return FALSE;
		if (child == parent)
			return TRUE;
		child = child->parent;
	}
}

static guint
article_get_part_state (Article * a)
{
	guint retval;

	/* not a multipart */
	if (a->parts<1)
		retval = 0;

	/* someone's posted a "00/124" nfo message */
	else if (a->part==0)
		retval = 0;

	/* incomplete multipart */
	else if (a->part>1 && a->parent==NULL)
		retval = STATE_MULTIPART_SOME;

	/* someone's posted a followup to a multipart */
	else if (a->linecount<250 && !g_strncasecmp(a->subject,"Re:", 3))
		retval = 0;

	/* a multipart */
	else {
		GSList * l;
		gint part = a->part + 1;
		for (l=a->threads; l!=NULL; l=l->next)
			if (ARTICLE(l->data)->parts==a->parts && ARTICLE(l->data)->part==part)
				++part;
		retval = part==a->parts+1 ? STATE_MULTIPART_ALL : STATE_MULTIPART_SOME;
	}

	return retval;
}

static void
set_children_part_state (Article * a, guint state)
{
	gint part;
	GSList * l;

	a->state |= state;

	part = a->part + 1;
	for (l=a->threads; l!=NULL; l=l->next) {
		Article * child = ARTICLE(l->data);
		if (child->parts==a->parts && child->part==part) {
			child->state |= state;
			++part;
		}
	}
}

/**
 * Thread the articles specified in list
 */
void
thread_articles (GPtrArray    * articles)
{
	guint i;
	guint qty = articles->len;
	Article search_a;
	gchar * norm_str_buf;
	Norm * norm;
	Norm * sorted_norm;
	GArray * buf = NULL;
	GString * tmp_references;
	GHashTable * ref_hash;

	/* sanity clause */
	g_return_if_fail (articles!=NULL);
	g_return_if_fail (articles_are_valid ((const Article**)articles->pdata, articles->len));

	if (qty<1 || !articles)
		return;

	if (break_thread_when_subject_changes)
		buf = g_array_new (FALSE, FALSE, 1);

	/* make a plausiably-legal article */
	search_a.number = 1;
	search_a.subject = "dummy subject";
	search_a.references = NULL;

	/* make a message-id-sorted array of the articles */
	ref_hash = g_hash_table_new (g_str_hash, g_str_equal);
	for (i=0; i<qty; ++i) {
		Article * a = ARTICLE (g_ptr_array_index (articles, i));
		g_hash_table_insert (ref_hash, (gpointer)a->message_id, a);
	}

	/* normalize the articles */
	norm = NULL;
	norm_str_buf = NULL;
	normalize_articles ((Article**)articles->pdata, qty, &norm, &norm_str_buf);

	/* sort the normalized articles */
	sorted_norm = g_memdup (norm, sizeof(Norm)*qty);
	qsort (sorted_norm, qty, sizeof(Norm), compare_pN_to_pN_by_subject);

	/* unthread the articles, just in case they were threaded before */
	for (i=0; i!=qty; ++i) {
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		a->parent = NULL;
		a->unread_children = 0;
		a->new_children = 0;
		g_slist_free (a->threads);
		a->threads = NULL;
	}


	/* thread the articles */
       	tmp_references = g_string_new (NULL);
	for (i=0; i!=qty; ++i)
	{
		const gchar * references;
		Article * parent = NULL;
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		gint index = -1;

		/* thread by reference
		   (except for parts 2...n of multiparts, which need to be threaded by multipart) */
		references = a->references;
		if (a->parts<2 && references!=NULL && *references=='<')
		{
			gchar * message_id;
			g_string_assign (tmp_references, references);
		       	message_id = strrchr (tmp_references->str, '<');

			while (parent==NULL && is_nonempty_string(message_id))
			{
				Article * match = (Article*) g_hash_table_lookup (ref_hash, message_id);

				/* if we found the ancestor & it's worthy, thread it */
				if (match!=NULL && !is_child_of(match,a))
				{
					gboolean subject_changed = FALSE;

					if (break_thread_when_subject_changes)
					{
						const char * new_subject = norm[i].subject;
						const char * old_subject = article_get_subject (match);
						g_array_set_size (buf, strlen(old_subject)+1);
						normalize_subject (buf->data, match);
						subject_changed = strcmp (buf->data, new_subject);
					}

					if (!subject_changed)
						parent = match;
				}

				/* if we couldn't find the ancestor, march up the References string */
				*message_id = '\0';
				g_strchomp (tmp_references->str);
				message_id = strrchr (tmp_references->str, '<');
			}
		}


		/* thread by multipart */
		if (!parent && a->parts>1 && a->part>1)
		{
			Norm n = norm[i];
			search_a.part = 1;
			search_a.date = 0; /* unlikely to get an exact match.. :) */
			n.a = &search_a;

			index = lower_bound (&n,
			                     sorted_norm,
			                     qty,
			                     sizeof(Norm),
			                     compare_pN_to_pN_by_subject,
			                     NULL);

			if (0<=index && index<qty)
			{
				Norm * match = &sorted_norm[index];
				if ((match->a != a)
					&& (match->a->parts == a->parts)
					&& (!strcmp(match->subject,n.subject))
					&& (!is_child_of(match->a,a)))
				{
					parent = match->a;
				}
			}
		}

		/* thread by subject */
		if (!parent && skip_reply_leader(a->subject)!=a->subject)
		{
			Norm n = norm[i];
			search_a.part = 0;
			search_a.date = 0; /* unlikely to get an exact match.. :) */
			n.a = &search_a;

			index = lower_bound (
				&n,
				sorted_norm,
				qty,
				sizeof(Norm),
				compare_pN_to_pN_by_subject,
				NULL);

			if (0<=index && index<qty && !is_child_of(sorted_norm[index].a,a))
			{
				Norm * match = &sorted_norm[index];

				if (!strcmp(match->subject,n.subject))
				{
					/* 1 original, 1 reply */
					parent = match->a;
				}
				else if (!strcmp(match->subject, a->subject) && (match->a->date<a->date))
				{
					/* 2 replies, no top --  oldest on top */
					parent = match->a;
				}
			}
		}

		if (parent != NULL) /* this article has a parent */
		{
			g_assert (!is_child_of(parent,a));

			/* link the two articles */
			a->parent = parent;
			parent->threads = g_slist_prepend (parent->threads, norm[i].a);
		}
	}

	/* right now all the children are normalized; point to articles */
	for (i=0; i!=qty; ++i) {
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		a->threads = g_slist_sort (a->threads, compare_pA_to_pA_by_part);
	}

	/* calculate new/unread child counts */
	for (i=0; i!=qty; ++i)
	{
		Article * a;

		a = ARTICLE(g_ptr_array_index(articles,i));
		if (a->parent!=NULL && !article_is_read(a))
			for (a=a->parent; a!=NULL; a=a->parent)
				++a->unread_children;

		a = ARTICLE(g_ptr_array_index(articles,i));
		if (a->parent!=NULL && article_is_new(a))
			for (a=a->parent; a!=NULL; a=a->parent)
				++a->new_children;
	}

	/* check multipart count */
	for (i=0; i!=articles->len; ++i) {
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		a->state &= ~(STATE_MULTIPART_ALL|STATE_MULTIPART_SOME);
		if (a->parent == NULL)
			set_children_part_state (a, article_get_part_state(a));
	}

	/* cleanup */
	g_hash_table_destroy (ref_hash);
	g_free (norm);
	g_free (norm_str_buf);
	g_free (sorted_norm);
	g_string_free (tmp_references, TRUE);
	if (buf != NULL)
		g_array_free (buf, TRUE);
}
