31 static void usage(
char *);
41 static void print_histogram(
int *histo,
int dim,
double step);
43 static int print_db_info(
void *k,
void *v,
void *arg);
56 static int sfilter_token_cmp(
void *a,
void *b);
57 static void group_token_files(
int argc,
char **argv,
int msgMin,
59 static void agregate_tokens(
int argc,
char **argv,
bool multinomial);
78 #define OPTSTR_INIT "hiclga" 79 #define OPTSTR_HELP "x" 80 #define OPTSTR_INFO "x" 81 #define OPTSTR_CHECK "x" 82 #define OPTSTR_LEARN "x" 83 #define OPTSTR_GROUP "x" 84 #define OPTSTR_AGGREG "x" 86 #define OPTSTR_GENERAL "vpM:" 97 #define SET_STATE(new) \ 100 switch (opt_state) { \ 102 opt_str = OPTSTR_GENERAL OPTSTR_INIT; \ 105 opt_str = OPTSTR_GENERAL OPTSTR_HELP; \ 108 opt_str = OPTSTR_GENERAL OPTSTR_INFO; \ 111 opt_str = OPTSTR_GENERAL OPTSTR_CHECK; \ 114 opt_str = OPTSTR_GENERAL OPTSTR_LEARN; \ 117 opt_str = OPTSTR_GENERAL OPTSTR_GROUP; \ 139 bool agregate =
FALSE;
148 memset(&data, 0,
sizeof data);
160 if (access(
"ze-bayes.db", R_OK) == 0)
161 data.
dbname =
"ze-bayes.db";
172 opts =
"lchsvn:xpM:t:u:b:igdae:m:";
174 while ((c = getopt(argc, argv, opts)) != -1)
211 printf(
"Error ... \n");
268 data.
nbt = atoi(optarg);
271 data.
uprob = atof(optarg);
303 nbMsgMin = atoi(optarg);
313 printf(
"Error ... \n");
327 if ((info || data.
check) && !agregate && !group)
332 fprintf(stderr,
"Error while opening %s database\n",
353 while (argi < argc && *argv[argi] ==
'-')
360 memset(&db_info, 0,
sizeof (db_info));
364 memset(&db_info, 0,
sizeof (db_info));
367 printf(
"** TOTAL :\n");
368 printf(
" Spams mbox %6d\n", db_info.
nfs);
369 printf(
" Spams %6d\n", db_info.
ns);
370 printf(
" Hams mbox %6d\n", db_info.
nfh);
371 printf(
" Hams %6d\n", db_info.
nh);
381 crypt =
STRNULL(crypt,
"PLAIN");
383 group_token_files(argc, argv, nbMsgMin, crypt);
393 agregate_tokens(argc, argv,
FALSE);
405 fname = argv[argi++];
407 printf(
"# Checking mailbox %s\n", fname);
417 printf(
"__BEGIN__\n");
418 printf(
"FILE %ld %c %s\n", time(NULL) / 86400, (data.
spam ?
'S' :
'H'),
421 memset(hostname, 0,
sizeof (hostname));
422 if (gethostname(hostname,
sizeof (hostname)) < 0)
423 strlcpy(hostname,
"(unknown)",
sizeof (hostname));
426 if ((p =
strchr(date,
'\n')) != NULL)
428 printf(
"CRYPT %s\n",
STRNULL(crypt,
"PLAIN"));
430 (
"INFO %s type=(%s) time=(%ld) date=(%s) hostname=(%s) crypt=(%s)\n",
431 fname, (data.
spam ?
"Spam" :
"Ham"), now, date, hostname,
439 printf(
"MSGS %ld %-10s %c %6d\n", time(NULL),
"NOID",
440 (data.
spam ?
'S' :
'H'), nb);
444 print_histogram(data.
histo, 20, 0.05);
463 static char *vtags[] = {
475 print_db_info(k, v, arg)
491 if ((p =
strchr(k,
':')) != NULL)
493 printf(
"** FILE : %s\n", p);
495 for (tag = vtags; *tag != NULL; tag++)
500 snprintf(buf,
sizeof (buf),
"%s=\\([^\\)]*\\)", *tag);
507 if ((t =
strrchr(buf,
')')) != NULL)
509 if ((t =
strchr(buf,
'=')) != NULL)
518 if (sscanf(t,
"%d %d", &s, &h) == 2)
520 printf(
" %-12s Spams/Hams = %5d/%5d\n", buf, s, h);
533 printf(
" %-12s %s\n", buf, t);
546 print_histogram(histo, nmax, step)
555 for (i = 0; i <= nmax; i++)
565 strlcat(line,
"->",
sizeof (line));
566 printf(
"%3d : %6.3f %5d %s\n", i, i * step, histo[i], line);
568 printf(
" : %5d Messages\n", nm);
577 sfilter_token_cmp(
void *a,
void *b)
600 btok_browse(
void *data,
void *arg)
604 int nbmin = btok_arg->
nbmin;
606 if (tok->
nbs >= nbmin || tok->
nbh >= nbmin)
608 btok_arg->
nts += tok->
nbs;
609 btok_arg->
nth += tok->
nbh;
616 if (tok->
nbs > 0 && tok->
nbh > 0)
620 printf(
"%-40s %d %d\n", tok->
token, tok->
nbs, tok->
nbh);
628 group_token_files(argc, argv, msgMin, scli_crypt)
637 int i, nl = 0, nt = 0;
647 for (i = 0; i < argc; i++)
653 printf(
"# Grouping %s\n", fname);
655 if ((fin = fopen(fname,
"r")) != NULL)
659 while (fgets(buf,
sizeof (buf), fin) != NULL)
665 if ((p =
strchr(buf,
'\n')) != NULL)
681 p += strcspn(p,
" \t");
682 p += strspn(p,
" \t");
692 icli_crypt = file_crypt;
698 file_crypt !=
HASH_PLAIN && icli_crypt != file_crypt)
701 "Warning : skipping file %s : encoding incompatibility",
722 if ((p =
strchr(buf,
' ')) != NULL)
724 v = p + strspn(p,
" \t");
733 if (sscanf(v,
"%d %d", &ns, &nh) < 2)
740 memset(&tok, 0,
sizeof (tok));
746 if (p != NULL && *p !=
'\0')
754 snprintf(tok.
token, sizeof (tok.
token),
"TOKEN:%s", dig);
784 printf(
"# Tokens added : %d\n", nt);
792 for (plist = list; plist != NULL; plist = plist->
next)
797 printf(
"%s\n", plist->key);
802 v += strcspn(v,
" \t");
803 v += strspn(v,
" \t");
805 if (sscanf(v,
"%d %d", &ns, &nh) < 2)
815 printf(
"CRYPT:%-32s %s\n",
"TOKENS", scli_crypt);
816 printf(
"MSGS:%-32s %d %d\n",
"__TOTAL__", nts, nth);
817 printf(
"MSGS:%-32s %d %d\n",
"total-tokens", nts, nth);
818 printf(
"MSGS:%-32s %d %d\n",
"total-msgs", nts, nth);
820 memset(&arg, 0,
sizeof (arg));
825 printf(
"Count:%-32s %d %d\n",
"msgs", nts, nth);
826 printf(
"Count:%-32s %d %d\n",
"features", arg.
nfspam, arg.
nfham);
827 printf(
"Count:%-32s %d %d\n",
"Features-shared-total", arg.
nfshar,
829 printf(
"Count:%-32s %d %d\n",
"tokens", arg.
nts, arg.
nth);
835 printf(
"# Tokens browsed : %d\n", nt);
846 dtok_browse(
void *data,
void *arg)
849 int *nbmin = (
int *) arg;
851 printf(
"TOKEN:%-40s %d %d\n", tok->
token, tok->
nbs, tok->
nbh);
858 agregate_tokens(argc, argv, multinomial)
867 int i, nl = 0, nt = 0;
871 for (i = 0; i < argc; i++)
881 printf(
"# Aggregating tokens from file : %s\n", fname);
883 if ((fin = fopen(fname,
"r")) != NULL)
892 memset(crypt, 0,
sizeof (crypt));
893 memset(info, 0,
sizeof (info));
894 memset(
id, 0,
sizeof (
id));
895 while (fgets(buf,
sizeof (buf), fin) != NULL)
900 if ((p =
strchr(buf,
'\n')) != NULL)
914 p += strcspn(p,
" \t");
916 p += strspn(p,
" \t");
918 strlcpy(crypt, p,
sizeof (crypt));
928 p += strcspn(p,
" \t");
930 p += strspn(p,
" \t");
932 p += strcspn(p,
" \t");
934 p += strspn(p,
" \t");
938 strlcpy(info, p,
sizeof (info));
956 if (errno == 0 && n > 0)
959 msgs = atoi(bargv[4]);
979 memset(&tok, 0,
sizeof (tok));
1012 printf(
"__BEGIN__\n");
1026 if ((p =
strchr(buf,
'\n')) != NULL)
1029 printf(
"DATE:%-40s %ld date=(%s)\n", fname, now, buf);
1031 printf(
"MSGS:%-40s %7ld %7ld\n", fname, (
long int ) nts, (
long int ) nth);
1032 printf(
"CRYPT:%-40s %s\n", fname, crypt);
1034 printf(
"INFO:%-40s %s count=(%ld %ld)\n", fname, info, (
long int ) nts, (
long int ) nth);
1040 printf(
"__END__\n");
1047 printf(
"# Tokens browsed : %d\n", nt);
1061 char *acpy = NULL, *appname;
1064 appname = basename(acpy);
1066 printf(
"Usage : %s [ -h | -c options | -l options]\n" 1067 " -h : help message (you're reading it...)\n" 1068 " -c : check message/mailbox spamicity\n" 1070 " -b file : tokens database\n" 1071 " -t N : number of tokens\n" 1072 " -u prob : probability associated to unknown tokens\n" 1073 " -x : show histogram of scores\n" 1074 " -l : learn message/mailbox\n" 1076 " -s : message/mbox is spam\n" 1077 " General options\n" 1078 " -f flag,flag : tokenizer flags\n" 1079 " -p : don't show progress\n" 1081 " -M size : max single message size\n" 1083 " -e plain | md5 | sha1\n" 1084 " -g group .dtok -> .tok\n" 1085 " -a group .tok -> .txt\n" 1086 " -m minimum messages count\n" 1091 " Compiled on %s %s\n\n", appname,
PACKAGE, __DATE__, __TIME__);
LISTR_T * zeLinkedList_Add(LISTR_T *, char *, int, void *, size_t)
void * zeBTree_Get(ZEBT_T *, void *)
bool zeLinkedList_Clear(LISTR_T *, LISTCLEAR_F)
bool zeBTree_Init(ZEBT_T *, size_t, ZEBT_CMP_F)
#define STRNCASEEQUAL(a, b, n)
bool zeBTree_Add(ZEBT_T *, void *)
void zeLog_SetOutput(bool, bool)
#define ZE_LogMsgError(level,...)
bool zeStrRegex(char *, char *, long *, long *, bool)
bool zeBTree_Destroy(ZEBT_T *)
bool bfilter_init(char *dbname)
int zeStr2Tokens(char *, int, char **, char *)
bool sfilter_cli_handle_message(char *fname, int msgNb, void *arg)
bool str2hash2hex(int code, char *sout, char *sin, size_t szout)
long zeStr2long(char *s, int *error, long dval)
char * zeStrSet(char *, int, int)
bool zeBTree_Clear(ZEBT_T *)
char * hash_code2label(int code)
#define ZE_MessageInfo(level,...)
int zeSafeStrnCpy(char *, size_t, char *, size_t)
int main(int argc, char **argv)
#define ZE_MessageWarning(level,...)
#define ZE_LogMsgWarning(level,...)
void smodel_db_info(char *prefix, smodel_db_browse_F func, void *arg)
char * zeStr2Upper(char *)
#define STRCASEEQUAL(a, b)
bool set_bfilter_db_crypt(int crypt)
int hash_label2code(char *label)
int zeBTree_Browse(ZEBT_T *, ZEBT_BROWSE_F, void *)