35 static bool cli_handle_message(
char *fname,
int msgNb,
void *arg);
56 static bool launch_workers(
int n,
char *fname,
msgtbx_T * mstatp);
58 static bool cli_toolbox(
msgtbx_T * mstatp);
65 static bool get_msg_headers(
char *,
spamchk_T *);
74 #define M_INTERACTIVE 4 77 main(
int argc,
char **argv)
79 const char *args =
"hvl:m:t:b:p:c:rs:S";
95 double spam_threshold = 0.75;
96 bool spam_judgement =
FALSE;
102 memset(fname, 0,
sizeof (fname));
104 while ((c = getopt(argc, argv, args)) != -1) {
111 if (optarg != NULL && strlen(optarg) > 0)
121 level = atoi(optarg);
127 nthreads = atoi(optarg);
139 v = strtod(optarg, NULL);
145 spam_judgement =
TRUE;
168 if (
STRNCASEEQUAL(optarg,
"interactive", strlen(
"interactive"))) {
175 printf(
"Error ... \n");
196 while (optind < argc && *argv[optind] ==
'-')
199 if (checks != NULL && strlen(checks) > 0) {
204 if ((p = strdup(checks)) == NULL) {
211 while (iargc-- > 0) {
212 if (strncasecmp(iargv[iargc],
"all", strlen(iargv[iargc])) == 0) {
216 if (strncasecmp(iargv[iargc],
"oracle", strlen(iargv[iargc])) == 0) {
220 if (strncasecmp(iargv[iargc],
"regex", strlen(iargv[iargc])) == 0) {
224 if (strncasecmp(iargv[iargc],
"urlbl", strlen(iargv[iargc])) == 0) {
228 if (strncasecmp(iargv[iargc],
"bayes", strlen(iargv[iargc])) == 0) {
237 fprintf(stderr,
"No checks defined...\n");
247 size_t msgSize, partSize;
251 dbname =
STRNULL(bayesdb, dbname);
253 memset(path, 0,
sizeof (path));
265 if (partSize < 10000)
271 if (rhs < 0.1 || rhs > 10.)
279 memset(&mstat, 0,
sizeof (mstat));
289 for (argi = optind; argi < argc; argi++)
290 nb +=
mbox_handle(argv[argi], cli_handle_message, &mstat);
293 for (argi = optind; argi < argc; argi++)
297 for (argi = optind; argi < argc; argi++)
298 nb += cli_handle_message(argv[argi], nb, &mstat);
301 for (argi = optind; argi < argc; argi++)
302 launch_workers(nthreads, argv[argi], &mstat);
311 if (0 &&
GET_BIT(docheck, 0)) {
316 for (i = 0; i < 32; i++)
323 for (i = 0; i < 32; i++) {
347 if ((h == NULL) || (strlen(h) == 0))
351 v = h + strcspn(h,
":");
354 v += strspn(v,
" \t");
368 get_msg_headers(fname, spam)
377 if ((fname == NULL) || (strlen(fname) == 0))
380 if ((fin = fopen(fname,
"r")) == NULL) {
385 memset(header, 0,
sizeof (header));
386 while (fgets(line,
sizeof (line), fin) != NULL) {
389 if (strspn(line,
"\r\n") == strlen(line))
392 if ((nl == 0) && (strncasecmp(
"From ", line, strlen(
"From ")) == 0))
397 s = line + strspn(line,
"\r\n");
398 if ((*s !=
' ') && (*s !=
'\t')) {
400 memset(header, 0,
sizeof (header));
401 snprintf(header,
sizeof (header),
"%s", s);
403 char *p = header + strlen(header);
405 strncpy(p, line, strlen(s));
423 free_msg_headers(spam)
437 cli_handle_message(fname, msgNb, arg)
442 char *ip =
"0.0.0.0";
443 char *
id =
"00000000.000";
449 size_t maxsize = 0x20000;
459 snprintf(bid,
sizeof (bid),
"%08X.000", msgNb);
463 memset(&spam, 0,
sizeof (spam));
464 memset(&flags, 0,
sizeof (flags));
465 memset(&rScores, 0,
sizeof (rScores));
486 memset(&bcheck, 0,
sizeof (bcheck));
493 if (rScores.
bayes >= 0.)
499 if (!get_msg_headers(fname, &spam)) {
512 for (h = spam.
hdrs; h != NULL; h = h->
next) {
515 if (strcasecmp(h->
attr,
"subject") == 0)
517 if (strcasecmp(h->
attr,
"from") == 0)
538 mstat->
score = score;
547 snprintf(sout, 80,
"%s", h->
value);
551 snprintf(sout, 80,
"%s", h->
value);
566 for (i = 2; i < mstat->
argc; i++) {
567 strlcat(buf,
" ",
sizeof (buf));
570 lscore = rScores.
bayes;
572 fname, buf,
logit(lscore), lscore,
584 free_msg_headers(&spam);
602 get_next_message_file(fin, buf, sz)
607 static pthread_mutex_t
mutex = PTHREAD_MUTEX_INITIALIZER;
617 if (fgets(buf, sz, fin) != NULL) {
620 if ((p =
strchr(buf,
'\n')) != NULL)
630 worker_check_file(arg)
637 while (get_next_message_file(worker->
fin, fname, sizeof (fname))) {
641 nb += cli_handle_message(fname, nb, &worker->
mstat);
648 launch_workers(n, fname, mstatp)
657 memset(&worker, 0,
sizeof (worker));
659 if ((fin = fopen(fname,
"r")) == NULL) {
664 for (i = 0; i < n; i++) {
668 worker[i].
fname = fname;
670 worker[i].
tid = (pthread_t) - 1;
672 worker[i].
mstat = *mstatp;
674 r = pthread_create(&worker[i].tid, NULL, worker_check_file, &worker[i]);
676 worker[i].
tid = (pthread_t) - 1;
681 for (i = 0; i < n; i++) {
684 if (worker[i].tid < 0)
689 r = pthread_join(worker[i].tid, NULL);
690 worker[i].
tid = (pthread_t) - 1;
712 while (fgets(line,
sizeof (line), stdin) != NULL) {
743 mstat->spam_judgement =
FALSE;
745 mstat->spam_judgement =
TRUE;
760 v = strtod(argv[1], NULL);
762 mstat->spam_threshold = v;
779 if (l < 0 || l > 15) {
810 nb += cli_handle_message(argv[1], nb, mstat);
825 printf(
"Usage : ze-message-tbx options\n" 828 " configuration file (default : %s)\n" 829 " -v verbose (increase level)\n" 830 " -r log to file matched regular expressions\n" 831 " -s score threshold for spam classification\n" 832 " -S spam \"ground truth\"\n" 833 " -N number of threads\n" 834 " -b bayes database path\n" 836 " where checks is a list of comma separated checks :\n" 837 " oracle, regex, urlbl, bayes or all\n" 840 " where file type tells how messages are arranged inside args\n" 841 " mbox - each argument is a mailbox file with many messages\n" 842 " file - each argument is a file with one message\n" 843 " maildir - each argument is a directory with files inside\n" 844 " dir - the same as maildir\n" 845 " list - each argument is a file with a list of file names\n" 846 " each file contains a single message\n",
850 printf(
" Compiled on %s %s\n\n", __DATE__, __TIME__);
int check_regex(char *, char *, char *, int)
#define SHOW_CURSOR(zero)
double sfilter_check_message(char *id, char *fname, sfilter_vsm_T *bcheck)
bool set_bfilter_max_sizes(size_t msg, size_t mime)
#define CF_BAYES_MAX_PART_SIZE
#define MUTEX_UNLOCK(mutex)
bool configure_msg_eval_function(char *val)
#define STRNCASEEQUAL(a, b, n)
#define MUTEX_LOCK(mutex)
#define CF_BAYES_NB_TOKENS
void zeLog_SetOutput(bool, bool)
#define ADJUST_FILENAME(path, fname, cfdir, defval)
#define CF_BAYES_MAX_MESSAGE_SIZE
char * zeStrChomp(char *)
bool bfilter_init(char *dbname)
int zeStr2Tokens(char *, int, char **, char *)
int main(int argc, char **argv)
void init_default_file_extensions()
bool fill_msg_scale(scores_scale_T *scale)
double oracle_get_score(int, int)
bool create_msg_score_header(char *buf, size_t size, char *id, char *hostname, msg_scores_T *scores)
#define ZE_MessageInfo(level,...)
bool set_bfilter_ham_spam_ratio(double ratio)
#define ZE_LogSysError(...)
char * cf_get_str(int id)
char * oracle_get_label(int, int)
int configure(char *, char *, bool)
int add_header(char *, spamchk_T *)
#define ORACLE_TYPE_PLAIN
#define STRCASEEQUAL(a, b)
double compute_msg_score(msg_scores_T *scores)
#define CF_SPAM_REGEX_MAX_MSG_SIZE
int scan_body_contents(char *, char *, char *, size_t, spamchk_T *, msg_flags_T *, msg_scores_T *)
#define CF_BAYES_HAM_SPAM_RATIO