/* $Id: junk_filter.h,v 1.16 1998/06/14 05:17:25 dps Exp $ */

#ifndef __JUNK_FILTER_H__
#define __JUNK_FILTER_H__
#include <iostream.h>
#include <fstream.h>
#include <stdlib.h>
#include "tune.h"

class word_junk_filter: public streambuf
{
private:
    enum { INIT_NONE, INIT_SKIPPED, INIT_DONE } init; /* Init stage */
    istream *is;		/* Data source */
    enum
    {
	__UNI_PROBE,		/* Unknown, probe */
	__UNI_NO,		/* No */
	__UNI_YES_LE,		/* Yes, little endian */
	__UNI_YES_BE		/* Yes, big endian */
    } unicode; /* Unicode flag */

    char junk_end_buf[RESUME_CHARS+1]; /* Junk end characters */
    int junk_end_usage;		/* Usage of junk end buffer */
    int text_size;		/* Text characters retreived */
    int junk_size;		/* Amount of junk skipped */

    char buf[JUNK_FILT_BUFSIZE+2];
    enum {
	NORMAL, UNICODE_Z,	/* Unicode, Unicode zero expected */
	SKIP_JUNK, SKIP_JUNK_WASPRN, /* Skip junk, Skip junk was printable */
	SINK_JUNK,		/* Like skip junk with more restart */
    } mode;
    char save;
    int ns;

    /* Tunable stuff */
    struct
    {
	int st_min_zeros;	  // Minimum leading zeros
	int st_min_ff;		  // Minimum leadingg ff characters
	int st_rej_limit;	  // Non-printing characters tolerated
	int non_unicode_st;	  // Not unicode start treshold
	int unicode_st;		  // Unicode start treshold
	int max_junk;		  // Junk required to trigger SINK_JUNK mode
	int min_text;		  // Text required to trigger SINK_JUNK mode
	int max_top_run;	  // Max top bit set run
	struct
	{
	    unsigned unicode_aggresive: 1; // Aggresive unicode mod
	    unsigned ff_intro:1;  // Allow ff leadin characters
	} options;
	int resume_chars;	  // Good characters need to stop junk skipping
    } tune;
    void set_dfl_tuning(void);	  // Set default tuning

    int __fill(const char *, int, int, int);
    int filter_junk(const char *, int);
    int skip_to_start(void);
    long pos;

     

public:
    inline int overflow(int ch)
    {
	ch=ch;
	return EOF; /* ZXZ */
    }
    inline int sync(void)
    {
	return 0;
    }

    int underflow(void);
    int do_reset(void);
    inline int __uflow(void) { return this->underflow(); }

    inline void open(istream *s)
    {
	if (is!=NULL)
	    delete(is);
	is=s;
	pos=0;
	init=INIT_NONE;
	mode=NORMAL;
	junk_end_usage=0;
	ns=0;
    }

    inline word_junk_filter(void)
    {
	set_dfl_tuning();	  // Set default tuning
	is=NULL;
	init=INIT_NONE;
    }

    inline word_junk_filter(istream *s)
    {
	set_dfl_tuning();	  // Set default tuning
	pos=0;
	init=INIT_NONE;
	junk_end_usage=0;
	is=s;
	mode=NORMAL;
	ns=0;
    }

    inline word_junk_filter(const char *f)
    {
	ifstream *s;

	set_dfl_tuning();	  // Set default tuning
	s=new(ifstream)(f);
	if (!(*s))
	{
	    cerr<<"Can not open "<<f<<" (fatal)\n";
	    exit(1);
	}
	is=s;
	pos=0;
	init=INIT_NONE;
	junk_end_usage=0;
	mode=NORMAL;
	ns=0;
    }

    inline ~word_junk_filter(void)
    {
	if (is!=NULL)
	    delete(is);
    }
};

#endif /* __JUNK_FILTER_H__ */
