ze-filter  (ze-filter-0.8.0-develop-180218)
Filter : Bayesian filter

Classes

struct  sfilter_vsm_T
 
struct  sfilter_cli_T
 
struct  bfilter_T
 
struct  sfilter_token_T
 
struct  msg_btsm_T
 
struct  feature_T
 
struct  tokconf_T
 

Macros

#define MAX_TOK   128
 
#define SFILTER_VSM_INIT(bc, nbt, prob)
 
#define BFLAG_TRFTOK   2
 
#define BFLAG_DEFAULT   (BFLAG_TRFTOK)
 
#define BFHISTODIM   21
 
#define CL_TYPE_NP   0
 
#define CL_TYPE_NB   1
 
#define CL_FS_EP   0
 
#define CL_FS_MI   1
 
#define CF_FS_TFIDF   2
 
#define BFILTER_INITIALIZER
 
#define UT_PROB   0.5
 
#define BFILTER_LOCK()   MUTEX_LOCK(&(bfilter.mutex))
 
#define BFILTER_UNLOCK()   MUTEX_UNLOCK(&(bfilter.mutex))
 
#define MSG_BTSM_INITIALIZER
 
#define FEATURE_WORD   0
 
#define FEATURE_NGRAM   1
 
#define _BODY   0
 
#define _HTML   1
 
#define _HEADER   2
 
#define _CTYPE   3
 
#define _CDISP   4
 
#define _RCVD   5
 
#define _FROM   6
 
#define _MAILER   7
 
#define SEP_TOK   " \t\n\r,=&?\"()[]{}<>;~/"
 
#define SEP_WS   " \t\n\r"
 
#define TOKCONF_INITIALIZER   {"body", "body", TRUE, NULL, NULL, TRUE}
 
#define ADD_TOKEN(bm, prefix, token)
 
#define X_HTML_SEP   " -x- "
 
#define NORM_FILENAME(fname)
 

Typedefs

typedef struct sfilter_vsm_T sfilter_vsm_T
 
typedef struct sfilter_cli_T sfilter_cli_T
 
typedef struct sfilter_token_T sfilter_token_T
 
typedef int(* btsm_browse_F) (void *, void *)
 
typedef int(* smodel_db_browse_F) (void *, void *, void *)
 
typedef struct msg_btsm_T msg_btsm_T
 

Functions

bool sfilter_cli_handle_message (char *fname, int msgNb, void *arg)
 
double sfilter_check_message (char *id, char *fname, sfilter_vsm_T *bcheck)
 
bfilter_Tbfilter_ptr ()
 
bool bfilter_init (char *dbname)
 
bool bfilter_db_open ()
 
bool bfilter_db_reopen ()
 
bool bfilter_close ()
 
bool bfilter_ok ()
 
uint32_t set_bfilter_flags (uint32_t flags)
 
bool set_bfilter_logit (bool enable)
 
bool set_bfilter_ham_spam_ratio (double ratio)
 
bool set_bfilter_unknown_token_prob (double prob)
 
bool set_bfilter_nb_tokens (int nbt)
 
bool set_bfilter_max_sizes (size_t msg, size_t mime)
 
bool set_bfilter_db_crypt (int crypt)
 
uint32_t get_bfilter_flags ()
 
double get_bfilter_ham_spam_ratio ()
 
double get_bfilter_unknown_token_prob ()
 
int get_bfilter_nb_tokens ()
 
size_t get_bfilter_max_sizes ()
 
int get_bfilter_db_crypt ()
 
bool smodel_db_check_token (char *key, sfilter_token_T *token)
 
void smodel_db_info (char *prefix, smodel_db_browse_F func, void *arg)
 
void set_tokconf_active (char *tag, bool active)
 
bool bfilter_handle_message (char *id, char *fname, btsm_browse_F func, void *arg)
 
double information_gain (double nms, double nmh, double nts, double nth)
 

Detailed Description

Statistical bayesian filter

Macro Definition Documentation

◆ _BODY

#define _BODY   0

Definition at line 82 of file ze-bsegmentation.c.

◆ _CDISP

#define _CDISP   4

Definition at line 86 of file ze-bsegmentation.c.

◆ _CTYPE

#define _CTYPE   3

Definition at line 85 of file ze-bsegmentation.c.

◆ _FROM

#define _FROM   6

Definition at line 88 of file ze-bsegmentation.c.

◆ _HEADER

#define _HEADER   2

Definition at line 84 of file ze-bsegmentation.c.

◆ _HTML

#define _HTML   1

Definition at line 83 of file ze-bsegmentation.c.

◆ _MAILER

#define _MAILER   7

Definition at line 89 of file ze-bsegmentation.c.

◆ _RCVD

#define _RCVD   5

Definition at line 87 of file ze-bsegmentation.c.

◆ ADD_TOKEN

#define ADD_TOKEN (   bm,
  prefix,
  token 
)
Value:
do { \
if (strlen(token) > 3) { \
char tstr[512]; \
\
if (prefix != NULL) \
snprintf(tstr, sizeof(tstr), "%s--%s", prefix, token); \
else \
snprintf(tstr, sizeof(tstr), "%s--%s", "GLOB", token); \
if (!msg_btsm_add_token(bm, tstr)) \
ZE_LogMsgError(0, "ERROR inserting new token"); \
} \
} while (0)

Definition at line 332 of file ze-bsegmentation.c.

◆ BFHISTODIM

#define BFHISTODIM   21

Definition at line 36 of file ze-bfilter.h.

◆ BFILTER_INITIALIZER

#define BFILTER_INITIALIZER
Value:
{ \
SIGNATURE, \
FALSE, \
PTHREAD_MUTEX_INITIALIZER, \
ZEDB_INITIALIZER, \
NULL, 0, \
0, 0, 0., 0., \
0, 0, 0., 0., \
0, 0, 0., 0., 0, \
1.0, \
UT_PROB, \
64, \
HASH_PLAIN, \
BFLAG_DEFAULT, \
400000, \
40000, \
FALSE, \
TRUE, \
TRUE, \
CL_TYPE_NP, \
CL_FS_EP, \
0 \
}

Definition at line 100 of file ze-bfilter.h.

◆ BFILTER_LOCK

#define BFILTER_LOCK ( )    MUTEX_LOCK(&(bfilter.mutex))

Definition at line 52 of file ze-bfilter.c.

◆ BFILTER_UNLOCK

#define BFILTER_UNLOCK ( )    MUTEX_UNLOCK(&(bfilter.mutex))

Definition at line 53 of file ze-bfilter.c.

◆ BFLAG_DEFAULT

#define BFLAG_DEFAULT   (BFLAG_TRFTOK)

Definition at line 34 of file ze-bfilter.h.

◆ BFLAG_TRFTOK

#define BFLAG_TRFTOK   2

Definition at line 32 of file ze-bfilter.h.

◆ CF_FS_TFIDF

#define CF_FS_TFIDF   2

Definition at line 97 of file ze-bfilter.h.

◆ CL_FS_EP

#define CL_FS_EP   0

Definition at line 95 of file ze-bfilter.h.

◆ CL_FS_MI

#define CL_FS_MI   1

Definition at line 96 of file ze-bfilter.h.

◆ CL_TYPE_NB

#define CL_TYPE_NB   1

Definition at line 93 of file ze-bfilter.h.

◆ CL_TYPE_NP

#define CL_TYPE_NP   0

Definition at line 92 of file ze-bfilter.h.

◆ FEATURE_NGRAM

#define FEATURE_NGRAM   1

Definition at line 54 of file ze-bsegmentation.c.

◆ FEATURE_WORD

#define FEATURE_WORD   0

Definition at line 53 of file ze-bsegmentation.c.

◆ MAX_TOK

#define MAX_TOK   128

Definition at line 33 of file ze-bcheck.h.

◆ MSG_BTSM_INITIALIZER

#define MSG_BTSM_INITIALIZER
Value:
{ \
JBT_INITIALIZER, \
NULL, \
NULL \
}

Definition at line 46 of file ze-bsegmentation.c.

◆ NORM_FILENAME

#define NORM_FILENAME (   fname)
Value:
do { \
if (fname != NULL) \
{ \
char *px = fname; \
\
for (px = fname; *px != '\0'; px++) \
{ \
if (isdigit(*px)) \
*px = '0'; \
if (isspace(*px)) \
*px = '_'; \
} \
} \
} while (0)

Definition at line 569 of file ze-bsegmentation.c.

◆ SEP_TOK

#define SEP_TOK   " \t\n\r,=&?\"()[]{}<>;~/"

Definition at line 127 of file ze-bsegmentation.c.

◆ SEP_WS

#define SEP_WS   " \t\n\r"

Definition at line 136 of file ze-bsegmentation.c.

◆ SFILTER_VSM_INIT

#define SFILTER_VSM_INIT (   bc,
  nbt,
  prob 
)
Value:
do { \
int i; \
memset(bc, 0, sizeof(sfilter_vsm_T)); \
for (i = 0; i < MAX_TOK; i++) \
bc->tok[i].prob = prob; \
bc->nbt = nbt; \
bc->nb = 0; \
} while (0)
#define MAX_TOK
Definition: ze-bcheck.h:33

Definition at line 41 of file ze-bcheck.h.

◆ TOKCONF_INITIALIZER

#define TOKCONF_INITIALIZER   {"body", "body", TRUE, NULL, NULL, TRUE}

Definition at line 153 of file ze-bsegmentation.c.

◆ UT_PROB

#define UT_PROB   0.5

Definition at line 190 of file ze-bfilter.h.

◆ X_HTML_SEP

#define X_HTML_SEP   " -x- "

Definition at line 519 of file ze-bsegmentation.c.

Typedef Documentation

◆ btsm_browse_F

typedef int(* btsm_browse_F) (void *, void *)

Definition at line 172 of file ze-bfilter.h.

◆ msg_btsm_T

typedef struct msg_btsm_T msg_btsm_T

◆ sfilter_cli_T

typedef struct sfilter_cli_T sfilter_cli_T

Statistical filter CLI parameters

◆ sfilter_token_T

◆ sfilter_vsm_T

typedef struct sfilter_vsm_T sfilter_vsm_T

◆ smodel_db_browse_F

typedef int(* smodel_db_browse_F) (void *, void *, void *)

Definition at line 174 of file ze-bfilter.h.

Function Documentation

◆ bfilter_close()

bool bfilter_close ( )

Definition at line 651 of file ze-bfilter.c.

◆ bfilter_db_open()

bool bfilter_db_open ( )

◆ bfilter_db_reopen()

bool bfilter_db_reopen ( )

Definition at line 614 of file ze-bfilter.c.

◆ bfilter_handle_message()

bool bfilter_handle_message ( char *  id,
char *  fname,
btsm_browse_F  func,
void *  arg 
)

Definition at line 1306 of file ze-bsegmentation.c.

◆ bfilter_init()

bool bfilter_init ( char *  dbname)

Definition at line 200 of file ze-bfilter.c.

◆ bfilter_ok()

bool bfilter_ok ( )

Definition at line 676 of file ze-bfilter.c.

◆ bfilter_ptr()

bfilter_T * bfilter_ptr ( )

Definition at line 60 of file ze-bfilter.c.

◆ get_bfilter_db_crypt()

int get_bfilter_db_crypt ( )

Definition at line 600 of file ze-bfilter.c.

◆ get_bfilter_flags()

uint32_t get_bfilter_flags ( )

Definition at line 524 of file ze-bfilter.c.

◆ get_bfilter_ham_spam_ratio()

double get_bfilter_ham_spam_ratio ( )

Definition at line 538 of file ze-bfilter.c.

◆ get_bfilter_max_sizes()

size_t get_bfilter_max_sizes ( )

Definition at line 580 of file ze-bfilter.c.

◆ get_bfilter_nb_tokens()

int get_bfilter_nb_tokens ( )

Definition at line 566 of file ze-bfilter.c.

◆ get_bfilter_unknown_token_prob()

double get_bfilter_unknown_token_prob ( )

Definition at line 552 of file ze-bfilter.c.

◆ information_gain()

double information_gain ( double  nms,
double  nmh,
double  nts,
double  nth 
)

Definition at line 689 of file ze-bfilter.c.

◆ set_bfilter_db_crypt()

bool set_bfilter_db_crypt ( int  crypt)

Definition at line 507 of file ze-bfilter.c.

◆ set_bfilter_flags()

uint32_t set_bfilter_flags ( uint32_t  flags)

Definition at line 421 of file ze-bfilter.c.

◆ set_bfilter_ham_spam_ratio()

bool set_bfilter_ham_spam_ratio ( double  ratio)

Definition at line 438 of file ze-bfilter.c.

◆ set_bfilter_logit()

bool set_bfilter_logit ( bool  enable)

Definition at line 404 of file ze-bfilter.c.

◆ set_bfilter_max_sizes()

bool set_bfilter_max_sizes ( size_t  msg,
size_t  mime 
)

Definition at line 486 of file ze-bfilter.c.

◆ set_bfilter_nb_tokens()

bool set_bfilter_nb_tokens ( int  nbt)

Definition at line 470 of file ze-bfilter.c.

◆ set_bfilter_unknown_token_prob()

bool set_bfilter_unknown_token_prob ( double  prob)

Definition at line 454 of file ze-bfilter.c.

◆ set_tokconf_active()

void set_tokconf_active ( char *  tag,
bool  active 
)

Definition at line 223 of file ze-bsegmentation.c.

◆ sfilter_check_message()

double sfilter_check_message ( char *  id,
char *  fname,
sfilter_vsm_T bcheck 
)

Definition at line 328 of file ze-bcheck.c.

◆ sfilter_cli_handle_message()

bool sfilter_cli_handle_message ( char *  fname,
int  msgNb,
void *  arg 
)

Definition at line 252 of file ze-bcheck.c.

◆ smodel_db_check_token()

bool smodel_db_check_token ( char *  key,
sfilter_token_T token 
)

Definition at line 723 of file ze-bfilter.c.

◆ smodel_db_info()

void smodel_db_info ( char *  prefix,
smodel_db_browse_F  func,
void *  arg 
)

Definition at line 846 of file ze-bfilter.c.