ze-filter  (ze-filter-0.8.0-develop-180218)
ze-bayes-tbx.c
Go to the documentation of this file.
1 /*
2  *
3  * ze-filter - Mail Server Filter for sendmail
4  *
5  * Copyright (c) 2001-2018 - Jose-Marcio Martins da Cruz
6  *
7  * Auteur : Jose Marcio Martins da Cruz
8  * jose.marcio.mc@gmail.org
9  *
10  * Historique :
11  * Creation : december 2005
12  *
13  * This program is free software, but with restricted license :
14  *
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
19  *
20  * More details about ze-filter license can be found at ze-filter
21  * web site : http://foss.jose-marcio.org
22  */
23 
24 
25 
26 #include <ze-sys.h>
27 #include <libze.h>
28 #include <libml.h>
29 #include "ze-filter.h"
30 
31 static void usage(char *);
32 
33 /*
34 ** MY_BDATA
35 */
36 
37 
38 /* *******************/
39 
40 
41 static void print_histogram(int *histo, int dim, double step);
42 
43 static int print_db_info(void *k, void *v, void *arg);
44 
45 typedef struct
46 {
47  int ns;
48  int nh;
49  int nfs;
50  int nfh;
51  int nm;
52 } db_info_T;
53 
54 
55 
56 static int sfilter_token_cmp(void *a, void *b);
57 static void group_token_files(int argc, char **argv, int msgMin,
58  char *crypt);
59 static void agregate_tokens(int argc, char **argv, bool multinomial);
60 
61 /* ****************************************************************************
62  * *
63  * *
64  **************************************************************************** */
65 /*
66 ** General options
67 **
68 ** - h : help
69 **
70 ** - l : learn
71 ** - s : spam
72 ** - h : ham
73 **
74 ** - c : check
75 **
76 */
77 
78 #define OPTSTR_INIT "hiclga"
79 #define OPTSTR_HELP "x"
80 #define OPTSTR_INFO "x"
81 #define OPTSTR_CHECK "x"
82 #define OPTSTR_LEARN "x"
83 #define OPTSTR_GROUP "x"
84 #define OPTSTR_AGGREG "x"
85 
86 #define OPTSTR_GENERAL "vpM:"
87 
88 
89 #define OPT_INIT 0
90 #define OPT_HELP 1
91 #define OPT_INFO 5
92 #define OPT_CHECK 3
93 #define OPT_LEARN 2
94 #define OPT_GROUP 4
95 #define OPT_AGGREG 5
96 
97 #define SET_STATE(new) \
98  do { \
99  opt_state = new; \
100  switch (opt_state) { \
101  case OPT_INIT: \
102  opt_str = OPTSTR_GENERAL OPTSTR_INIT; \
103  break; \
104  case OPT_HELP: \
105  opt_str = OPTSTR_GENERAL OPTSTR_HELP; \
106  break; \
107  case OPT_INFO: \
108  opt_str = OPTSTR_GENERAL OPTSTR_INFO; \
109  break; \
110  case OPT_CHECK: \
111  opt_str = OPTSTR_GENERAL OPTSTR_CHECK; \
112  break; \
113  case OPT_LEARN: \
114  opt_str = OPTSTR_GENERAL OPTSTR_LEARN; \
115  break; \
116  case OPT_GROUP: \
117  opt_str = OPTSTR_GENERAL OPTSTR_GROUP; \
118  break; \
119  } \
120  } while (0)
121 
122 
123 
124 int
125 main(argc, argv)
126  int argc;
127  char **argv;
128 {
129  char *fname;
130  sfilter_cli_T data;
131  int argi = 0;
132  int nb = 0;
133 
134  char *opts = "";
135 
136  bool info = FALSE;
137 
138  bool group = FALSE;
139  bool agregate = FALSE;
140 
141  char *crypt = NULL;
142 
143  int nbMsgMin = 4;
144 
145  int opt_state = OPT_INIT;
146  char *opt_str = OPTSTR_INIT;
147 
148  memset(&data, 0, sizeof data);
149  data.bcheck.nbt = 21;
150  data.spam = FALSE;
151  data.maxSize = 100000;
152  data.check = TRUE;
153  data.verbose = FALSE;
154  data.histogram = FALSE;
155  data.progress = TRUE;
156  data.dbname = ZE_CDBDIR "/ze-bayes.db";
157  data.nbt = 21;
158  data.uprob = 0.5;
159 
160  if (access("ze-bayes.db", R_OK) == 0)
161  data.dbname = "ze-bayes.db";
162 
164 
165  ze_logLevel = 5;
166 
168  {
169  int c;
170  int optlevel = 0;
171 
172  opts = "lchsvn:xpM:t:u:b:igdae:m:";
173 
174  while ((c = getopt(argc, argv, opts)) != -1)
175  {
176 
177 #if 0
178  /* First choice - what to do */
179  if (opt_state == OPT_INIT)
180  {
181  switch (c)
182  {
183  /* help */
184  case 'h':
186  usage(argv[0]);
187  exit(0);
188  break;
189  /* get info */
190  case 'i':
192  info = TRUE;
193  break;
194 
195  /* Check options */
196  case 'c':
198  data.check = TRUE;
199  break;
200 
201  /* learn options */
202  case 'l':
204  data.check = FALSE;
205  break;
206 
207  /* default */
208  default:
209  opt_state = OPT_INIT;
210  usage(argv[0]);
211  printf("Error ... \n");
212  exit(0);
213  }
214 
215  continue;
216  }
217 
218  /* Learning sub-options */
219  if (opt_state == OPT_LEARN)
220  {
221 
222  continue;
223  }
224 
225  /* Message checking sub-options */
226  if (opt_state == OPT_CHECK)
227  {
228 
229  }
230 
231  /* Message checking sub-options */
232  if (opt_state == OPT_INFO)
233  {
234 
235  }
236 
237  /* Message checking sub-options */
238  if (opt_state == OPT_GROUP)
239  {
240 
241  }
242 #else
243 
244  switch (c)
245  {
246  /* help */
247  case 'h':
248  usage(argv[0]);
249  exit(0);
250  break;
251 
252  /* get info */
253  case 'i':
254  info = TRUE;
255  break;
256 
257  /* Check options */
258  case 'c':
259  data.check = TRUE;
260  break;
261  case 'x':
262  data.histogram = TRUE;
263  break;
264  case 'b':
265  data.dbname = optarg;
266  break;
267  case 't':
268  data.nbt = atoi(optarg);
269  break;
270  case 'u':
271  data.uprob = atof(optarg);
272  break;
273 
274  /* learn options */
275  case 'e':
276  crypt = optarg;
277  break;
278 
279  case 'l':
280  data.check = FALSE;
281  break;
282 
283  case 's':
284  data.spam = TRUE;
285  break;
286 
287  /* general options */
288  case 'v':
289  data.verbose = TRUE;
290  ze_logLevel++;
291  break;
292  case 'p':
293  data.progress = !data.progress;
294  break;
295  case 'M':
296  data.maxSize = atoi(optarg);
297  break;
298 
299  case 'g':
300  group = TRUE;
301  break;
302  case 'm':
303  nbMsgMin = atoi(optarg);
304  break;
305 
306  case 'a':
307  agregate = TRUE;
308  break;
309 
310  /* default */
311  default:
312  usage(argv[0]);
313  printf("Error ... \n");
314  exit(0);
315  }
316 #endif
317  }
318  }
319 
321 
322  argi = optind;
323 
324  {
325  char *dbname = NULL;
326 
327  if ((info || data.check) && !agregate && !group)
328  dbname = data.dbname;
329 
330  if (!bfilter_init(dbname))
331  {
332  fprintf(stderr, "Error while opening %s database\n",
333  STRNULL(dbname, "null"));
334  return 1;
335  }
336  }
337 
338  if (!data.check)
339  {
341  if (crypt != NULL)
342  {
343  zeStr2Upper(crypt);
344  if (STRCASEEQUAL(crypt, "PLAIN"))
346  if (STRCASEEQUAL(crypt, "MD5"))
348  if (STRCASEEQUAL(crypt, "SHA1"))
350  }
351  }
352 
353  while (argi < argc && *argv[argi] == '-')
354  argi++;
355 
356  if (info)
357  {
358  db_info_T db_info = { 0, 0, 0 };
359 
360  memset(&db_info, 0, sizeof (db_info));
361  (void) smodel_db_info("msgs:", print_db_info, &db_info);
362  printf("\n");
363 
364  memset(&db_info, 0, sizeof (db_info));
365  (void) smodel_db_info("info:", print_db_info, &db_info);
366  printf("\n");
367  printf("** TOTAL :\n");
368  printf(" Spams mbox %6d\n", db_info.nfs);
369  printf(" Spams %6d\n", db_info.ns);
370  printf(" Hams mbox %6d\n", db_info.nfh);
371  printf(" Hams %6d\n", db_info.nh);
372  printf("\n");
373  exit(0);
374  }
375 
376  if (group)
377  {
378  argc -= argi;
379  argv += argi;
380 
381  crypt = STRNULL(crypt, "PLAIN");
382 
383  group_token_files(argc, argv, nbMsgMin, crypt);
384 
385  exit(0);
386  }
387 
388  if (agregate)
389  {
390  argc -= argi;
391  argv += argi;
392 
393  agregate_tokens(argc, argv, FALSE);
394 
395  exit(0);
396  }
397 
398  /* learning or checking */
399  if (argi < argc)
400  {
401  char *fname = NULL;
402 
403  while (argi < argc)
404  {
405  fname = argv[argi++];
406 
407  printf("# Checking mailbox %s\n", fname);
408 
409  memset(data.histo, 0, sizeof (data.histo));
410  if (!data.check)
411  {
412  char hostname[256];
413  char date[256];
414  time_t now;
415  char *p;
416 
417  printf("__BEGIN__\n");
418  printf("FILE %ld %c %s\n", time(NULL) / 86400, (data.spam ? 'S' : 'H'),
419  fname);
420 
421  memset(hostname, 0, sizeof (hostname));
422  if (gethostname(hostname, sizeof (hostname)) < 0)
423  strlcpy(hostname, "(unknown)", sizeof (hostname));
424  now = time(NULL);
425  CTIME_R(&now, date);
426  if ((p = strchr(date, '\n')) != NULL)
427  *p = '\0';
428  printf("CRYPT %s\n", STRNULL(crypt, "PLAIN"));
429  printf
430  ("INFO %s type=(%s) time=(%ld) date=(%s) hostname=(%s) crypt=(%s)\n",
431  fname, (data.spam ? "Spam" : "Ham"), now, date, hostname,
432  STRNULL(crypt, "PLAIN"));
433  }
434 
435  nb += mbox_handle(fname, sfilter_cli_handle_message, &data);
436 
437  if (!data.check)
438  {
439  printf("MSGS %ld %-10s %c %6d\n", time(NULL), "NOID",
440  (data.spam ? 'S' : 'H'), nb);
441  printf("__END__\n");
442  }
443  if (data.check && data.histogram)
444  print_histogram(data.histo, 20, 0.05);
445  }
446  } else
447  {
448  usage(argv[0]);
449  exit(1);
450  }
451 
452  if (data.check)
453  bfilter_close();
454 
455  return 0;
456 }
457 
458 /* ****************************************************************************
459  * *
460  * *
461  **************************************************************************** */
462 
463 static char *vtags[] = {
464  "type",
465  "time",
466  "date",
467  "hostname",
468  "count",
469  "crypt",
470  NULL
471 };
472 
473 
474 static int
475 print_db_info(k, v, arg)
476  void *k;
477  void *v;
478  void *arg;
479 {
480  db_info_T *info = (db_info_T *) arg;
481 
482  ASSERT(k != NULL);
483  ASSERT(v != NULL);
484 
485  /* printf(" ** %-32s : %s\n", k, v); */
486  if (STRNCASEEQUAL(k, "info:", strlen("info:")))
487  {
488  char *p = NULL;
489  char **tag;
490 
491  if ((p = strchr(k, ':')) != NULL)
492  *p++ = '\0';
493  printf("** FILE : %s\n", p);
494 
495  for (tag = vtags; *tag != NULL; tag++)
496  {
497  long pf, pi;
498  char buf[256];
499 
500  snprintf(buf, sizeof (buf), "%s=\\([^\\)]*\\)", *tag);
501  if (zeStrRegex(v, buf, &pi, &pf, TRUE))
502  {
503  char *s, *t;
504 
505  zeSafeStrnCpy(buf, sizeof (buf), (char *) v + pi, pf - pi);
506 
507  if ((t = strrchr(buf, ')')) != NULL)
508  *t = '\0';
509  if ((t = strchr(buf, '=')) != NULL)
510  *t++ = '\0';
511  if (*t == '(')
512  t++;
513 
514  if (STRCASEEQUAL(*tag, "count"))
515  {
516  int s, h;
517 
518  if (sscanf(t, "%d %d", &s, &h) == 2)
519  {
520  printf(" %-12s Spams/Hams = %5d/%5d\n", buf, s, h);
521  info->ns += s;
522  info->nh += h;
523  }
524  continue;
525  }
526  if (STRCASEEQUAL(*tag, "type"))
527  {
528  if (STRCASEEQUAL(t, "spam"))
529  info->nfs++;
530  else
531  info->nfh++;
532  }
533  printf(" %-12s %s\n", buf, t);
534  }
535  }
536  }
537 
538  return 0;
539 }
540 
541 /* ****************************************************************************
542  * *
543  * *
544  **************************************************************************** */
545 void
546 print_histogram(histo, nmax, step)
547  int *histo;
548  int nmax;
549  double step;
550 {
551  char line[128];
552  int i, nm;
553 
554  nm = 0;
555  for (i = 0; i <= nmax; i++)
556  {
557  int nb;
558 
559  nb = histo[i];
560  nm += nb;
561  if (nb > 80)
562  nb = 80;
563  zeStrSet(line, '*', nb);
564  if (nb > 80)
565  strlcat(line, "->", sizeof (line));
566  printf("%3d : %6.3f %5d %s\n", i, i * step, histo[i], line);
567  }
568  printf(" : %5d Messages\n", nm);
569 }
570 
571 /* ****************************************************************************
572  * *
573  * *
574  **************************************************************************** */
575 
576 static int
577 sfilter_token_cmp(void *a, void *b)
578 {
579  sfilter_token_T *ta = (sfilter_token_T *) a;
580  sfilter_token_T *tb = (sfilter_token_T *) b;
581 
582  return strcmp(ta->token, tb->token);
583 }
584 
585 typedef struct
586 {
587  int nbmin;
588 
589  int nts;
590  int nth;
591 
592  int nfspam;
593  int nfham;
594 
595  int nfshar;
596  int nftot;
597 } btok_arg_T;
598 
599 static int
600 btok_browse(void *data, void *arg)
601 {
602  sfilter_token_T *tok = (sfilter_token_T *) data;
603  btok_arg_T *btok_arg = (btok_arg_T *) arg;
604  int nbmin = btok_arg->nbmin;
605 
606  if (tok->nbs >= nbmin || tok->nbh >= nbmin)
607  {
608  btok_arg->nts += tok->nbs;
609  btok_arg->nth += tok->nbh;
610 
611  if (tok->nbs > 0)
612  btok_arg->nfspam++;
613  if (tok->nbh > 0)
614  btok_arg->nfham++;
615 
616  if (tok->nbs > 0 && tok->nbh > 0)
617  btok_arg->nfshar++;
618  btok_arg->nftot++;
619 
620  printf("%-40s %d %d\n", tok->token, tok->nbs, tok->nbh);
621  return 1;
622  }
623  return 0;
624 }
625 
626 
627 static void
628 group_token_files(argc, argv, msgMin, scli_crypt)
629  int argc;
630  char **argv;
631  int msgMin;
632  char *scli_crypt;
633 {
634  ZEBT_T bt = JBT_INITIALIZER;
635  LISTR_T *list = NULL, *plist;
636  char *fname = NULL;
637  int i, nl = 0, nt = 0;
638  int icli_crypt = HASH_PLAIN;
639  int file_crypt = HASH_PLAIN;
640 
641  if (!zeBTree_Init(&bt, sizeof (sfilter_token_T), sfilter_token_cmp))
642  goto fin;
643 
644  icli_crypt = hash_label2code(scli_crypt);
645  scli_crypt = hash_code2label(icli_crypt);
646 
647  for (i = 0; i < argc; i++)
648  {
649  FILE *fin = NULL;
650 
651  fname = argv[i];
652 
653  printf("# Grouping %s\n", fname);
654 
655  if ((fin = fopen(fname, "r")) != NULL)
656  {
657  char buf[1024];
658 
659  while (fgets(buf, sizeof (buf), fin) != NULL)
660  {
661  char *p;
662  int tok_crypt = HASH_PLAIN;
663 
664  nl++;
665  if ((p = strchr(buf, '\n')) != NULL)
666  *p = '\0';
667 
668  if (STRNCASEEQUAL(buf, "msgs:", strlen("msgs:")))
669  {
670  if (zeStrRegex(buf, "^msgs:[^ ]*[.]dtok[ ]+", NULL, NULL, TRUE))
671  list = zeLinkedList_Add(list, buf, 1, NULL, 0);
672  }
673 
674  if (STRNCASEEQUAL(buf, "crypt:", strlen("crypt:")))
675  {
676  char *p;
677 
678  list = zeLinkedList_Add(list, buf, 1, NULL, 0);
679 
680  p = buf;
681  p += strcspn(p, " \t");
682  p += strspn(p, " \t");
683 
684  file_crypt = hash_label2code(p);
685 
686  if (i == 0)
687  {
688  if (icli_crypt == HASH_PLAIN && file_crypt != HASH_PLAIN)
689  {
690  ZE_MessageInfo(8, "# Changing global crypt option %s -> %s %d\n",
691  scli_crypt, hash_code2label(file_crypt));
692  icli_crypt = file_crypt;
693  scli_crypt = hash_code2label(icli_crypt);
694  }
695  }
696 
697  if (icli_crypt != HASH_PLAIN &&
698  file_crypt != HASH_PLAIN && icli_crypt != file_crypt)
699  {
701  "Warning : skipping file %s : encoding incompatibility",
702  fname);
703  break;
704  }
705  }
706 
707  if (STRNCASEEQUAL(buf, "info:", strlen("info:")))
708  {
709  list = zeLinkedList_Add(list, buf, 1, NULL, 0);
710  }
711 
712  if (STRNCASEEQUAL(buf, "token:", strlen("token:")))
713  {
714  sfilter_token_T tok, *t;
715  char *k, *v, *p;
716  int ns, nh;
717 
718  k = buf;
719  v = NULL;
720  ns = nh = 0;
721 
722  if ((p = strchr(buf, ' ')) != NULL)
723  {
724  v = p + strspn(p, " \t");
725  *p = '\0';
726  }
727 
728  if (v == NULL)
729  {
730  continue;
731  }
732 
733  if (sscanf(v, "%d %d", &ns, &nh) < 2)
734  {
735  ZE_LogMsgWarning(0, "Error : %s %s", k, v);
736 
737  continue;
738  }
739 
740  memset(&tok, 0, sizeof (tok));
741  if (icli_crypt != HASH_PLAIN && file_crypt == HASH_PLAIN)
742  {
743  char dig[64];
744 
745  p = strchr(buf, ':');
746  if (p != NULL && *p != '\0')
747  p++;
748 
749  switch (icli_crypt)
750  {
751  case HASH_MD5:
752  case HASH_SHA1:
753  str2hash2hex(icli_crypt, dig, p, sizeof (dig));
754  snprintf(tok.token, sizeof (tok.token), "TOKEN:%s", dig);
755  break;
756  default:
757  break;
758  }
759  } else
760  strlcpy(tok.token, buf, sizeof (tok.token));
761  tok.nbs = ns;
762  tok.nbh = nh;
763  if ((t = zeBTree_Get(&bt, &tok)) == NULL)
764  {
765  if (!zeBTree_Add(&bt, &tok))
766  {
767  ZE_LogMsgError(0, "ERROR inserting new token");
768  continue;
769  }
770  } else
771  {
772  t->nbs += ns;
773  t->nbh += nh;
774  }
775 
776  nt++;
777  continue;
778  }
779  }
780  fclose(fin);
781  }
782  }
783 
784  printf("# Tokens added : %d\n", nt);
785 
786  {
787  int nts, nth;
788  btok_arg_T arg;
789 
790  nts = nth = 0;
791 
792  for (plist = list; plist != NULL; plist = plist->next)
793  {
794  char *v;
795  int ns, nh;
796 
797  printf("%s\n", plist->key);
798 
799  if (STRNCASEEQUAL(plist->key, "msgs:", strlen("msgs:")))
800  {
801  v = plist->key;
802  v += strcspn(v, " \t");
803  v += strspn(v, " \t");
804 
805  if (sscanf(v, "%d %d", &ns, &nh) < 2)
806  {
807  ZE_LogMsgWarning(0, "Error : %s", v);
808 
809  continue;
810  }
811  nts += ns;
812  nth += nh;
813  }
814  }
815  printf("CRYPT:%-32s %s\n", "TOKENS", scli_crypt);
816  printf("MSGS:%-32s %d %d\n", "__TOTAL__", nts, nth);
817  printf("MSGS:%-32s %d %d\n", "total-tokens", nts, nth);
818  printf("MSGS:%-32s %d %d\n", "total-msgs", nts, nth);
819 
820  memset(&arg, 0, sizeof (arg));
821  arg.nbmin = msgMin;
822 
823  nt = zeBTree_Browse(&bt, btok_browse, &arg);
824 
825  printf("Count:%-32s %d %d\n", "msgs", nts, nth);
826  printf("Count:%-32s %d %d\n", "features", arg.nfspam, arg.nfham);
827  printf("Count:%-32s %d %d\n", "Features-shared-total", arg.nfshar,
828  arg.nftot);
829  printf("Count:%-32s %d %d\n", "tokens", arg.nts, arg.nth);
830  }
831 
832  zeBTree_Destroy(&bt);
833  zeLinkedList_Clear(list, NULL);
834 
835  printf("# Tokens browsed : %d\n", nt);
836 
837 fin:
838  exit(0);
839 }
840 
841 /* ****************************************************************************
842  * *
843  * *
844  **************************************************************************** */
845 static int
846 dtok_browse(void *data, void *arg)
847 {
848  sfilter_token_T *tok = (sfilter_token_T *) data;
849  int *nbmin = (int *) arg;
850 
851  printf("TOKEN:%-40s %d %d\n", tok->token, tok->nbs, tok->nbh);
852 
853  return 1;
854 }
855 
856 
857 static void
858 agregate_tokens(argc, argv, multinomial)
859  int argc;
860  char **argv;
861  bool multinomial;
862 {
863  ZEBT_T bt = JBT_INITIALIZER;
864 
865  LISTR_T *list = NULL, *plist;
866  char *fname = NULL;
867  int i, nl = 0, nt = 0;
868 
869  /*char *crypt = NULL; */
870 
871  for (i = 0; i < argc; i++)
872  {
873  FILE *fin = NULL;
874  int nts, nth;
875 
876  if (!zeBTree_Init(&bt, sizeof (sfilter_token_T), sfilter_token_cmp))
877  goto fin;
878 
879  fname = argv[i];
880 
881  printf("# Aggregating tokens from file : %s\n", fname);
882 
883  if ((fin = fopen(fname, "r")) != NULL)
884  {
885  char buf[1024];
886  bool spam = FALSE;
887  int msgs = 0;
888  char info[1024];
889  char crypt[16];
890  char id[64];
891 
892  memset(crypt, 0, sizeof (crypt));
893  memset(info, 0, sizeof (info));
894  memset(id, 0, sizeof (id));
895  while (fgets(buf, sizeof (buf), fin) != NULL)
896  {
897  char *p;
898 
899  nl++;
900  if ((p = strchr(buf, '\n')) != NULL)
901  *p = '\0';
902 
903  if (STRNCASEEQUAL(buf, "file ", strlen("file ")))
904  {
905  list = zeLinkedList_Add(list, buf, 1, NULL, 0);
906  }
907 
908  if (STRNCASEEQUAL(buf, "crypt ", strlen("crypt ")))
909  {
910  char *p;
911 
912  p = buf;
913  if (strlen(p) > 0)
914  p += strcspn(p, " \t");
915  if (strlen(p) > 0)
916  p += strspn(p, " \t");
917 
918  strlcpy(crypt, p, sizeof (crypt));
919  list = zeLinkedList_Add(list, buf, 1, NULL, 0);
920  }
921 
922  if (STRNCASEEQUAL(buf, "info ", strlen("info ")))
923  {
924  char *p;
925 
926  p = buf;
927  if (strlen(p) > 0)
928  p += strcspn(p, " \t");
929  if (strlen(p) > 0)
930  p += strspn(p, " \t");
931  if (strlen(p) > 0)
932  p += strcspn(p, " \t");
933  if (strlen(p) > 0)
934  p += strspn(p, " \t");
935 
936  if (strlen(p) > 0)
937  {
938  strlcpy(info, p, sizeof (info));
939  list = zeLinkedList_Add(list, p, 1, NULL, 0);
940  }
941  }
942 
943  if (STRNCASEEQUAL(buf, "msgs ", strlen("msgs ")))
944  {
945  int bargc = 0;
946  char *bargv[8];
947  int n = 0;
948 
949  bargc = zeStr2Tokens(buf, 8, bargv, " ");
950  if (bargc == 0)
951  continue;
952 
953  spam = STRCASEEQUAL(bargv[3], "s");
954  errno = 0;
955  n = zeStr2long(bargv[4], NULL, 0);
956  if (errno == 0 && n > 0)
957  msgs = n;
958 
959  msgs = atoi(bargv[4]);
960 
961  list = zeLinkedList_Add(list, buf, 1, NULL, 0);
962  }
963 
964  if (STRNCASEEQUAL(buf, "token ", strlen("token ")))
965  {
966  sfilter_token_T tok, *t;
967  char *k, *v, *p;
968  int ns, nh;
969 
970  int bargc = 0;
971  char *bargv[8];
972  int nb = 1;
973 
974  bargc = zeStr2Tokens(buf, 8, bargv, " ");
975  if (bargc == 0)
976  continue;
977 
978  spam = STRCASEEQUAL(bargv[3], "s");
979  memset(&tok, 0, sizeof (tok));
980 
981  /* XXX encode : PLAIN / MD5 / SHA1 */
982  strlcpy(tok.token, bargv[5], sizeof (tok.token));
983 
984  if (multinomial)
985  nb = atoi(bargv[4]);
986 
987  if (spam)
988  tok.nbs = nb;
989  else
990  tok.nbh = nb;
991  if ((t = zeBTree_Get(&bt, &tok)) == NULL)
992  {
993  if (!zeBTree_Add(&bt, &tok))
994  {
995  ZE_LogMsgError(0, "ERROR inserting new token");
996  continue;
997  }
998  } else
999  {
1000  if (spam)
1001  t->nbs += nb;
1002  else
1003  t->nbh += nb;
1004  }
1005 
1006  nt++;
1007  continue;
1008  }
1009  }
1010  fclose(fin);
1011 
1012  printf("__BEGIN__\n");
1013  nts = nth = 0;
1014 
1015  if (spam)
1016  nts = msgs;
1017  else
1018  nth = msgs;
1019 
1020  {
1021  time_t now;
1022  char buf[64], *p;
1023 
1024  now = time(NULL);
1025  CTIME_R(&now, buf);
1026  if ((p = strchr(buf, '\n')) != NULL)
1027  *p = '\0';
1028 
1029  printf("DATE:%-40s %ld date=(%s)\n", fname, now, buf);
1030 
1031  printf("MSGS:%-40s %7ld %7ld\n", fname, (long int ) nts, (long int ) nth);
1032  printf("CRYPT:%-40s %s\n", fname, crypt);
1033 
1034  printf("INFO:%-40s %s count=(%ld %ld)\n", fname, info, (long int ) nts, (long int ) nth);
1035  }
1036 
1037  nt = zeBTree_Browse(&bt, dtok_browse, NULL);
1038  zeBTree_Clear(&bt);
1039 
1040  printf("__END__\n");
1041  }
1042  zeLinkedList_Clear(list, NULL);
1043  }
1044 
1045  zeBTree_Destroy(&bt);
1046 
1047  printf("# Tokens browsed : %d\n", nt);
1048 
1049 fin:
1050  exit(0);
1051 }
1052 
1053 /* ****************************************************************************
1054  * *
1055  * *
1056  **************************************************************************** */
1057 static void
1058 usage(app)
1059  char *app;
1060 {
1061  char *acpy = NULL, *appname;
1062 
1063  acpy = strdup(app);
1064  appname = basename(acpy);
1065 
1066  printf("Usage : %s [ -h | -c options | -l options]\n"
1067  " -h : help message (you're reading it...)\n"
1068  " -c : check message/mailbox spamicity\n"
1069  " Check options\n"
1070  " -b file : tokens database\n"
1071  " -t N : number of tokens\n"
1072  " -u prob : probability associated to unknown tokens\n"
1073  " -x : show histogram of scores\n"
1074  " -l : learn message/mailbox\n"
1075  " Learn options\n"
1076  " -s : message/mbox is spam\n"
1077  " General options\n"
1078  " -f flag,flag : tokenizer flags\n"
1079  " -p : don't show progress\n"
1080  " -v : verbose\n"
1081  " -M size : max single message size\n"
1082  " -i \n"
1083  " -e plain | md5 | sha1\n"
1084  " -g group .dtok -> .tok\n"
1085  " -a group .tok -> .txt\n"
1086  " -m minimum messages count\n"
1087  "\n"
1088  "\n"
1089  " %s\n"
1090  " " COPYRIGHT "\n"
1091  " Compiled on %s %s\n\n", appname, PACKAGE, __DATE__, __TIME__);
1092 
1093  FREE(acpy);
1094 }
LISTR_T * zeLinkedList_Add(LISTR_T *, char *, int, void *, size_t)
Definition: zeLinkedList.c:34
double uprob
Definition: ze-bcheck.h:69
size_t maxSize
Definition: ze-bcheck.h:57
#define HASH_SHA1
Definition: ze-msg-hash.h:29
void * zeBTree_Get(ZEBT_T *, void *)
Definition: zeBTree.c:281
#define ASSERT(a)
Definition: macros.h:27
bool zeLinkedList_Clear(LISTR_T *, LISTCLEAR_F)
Definition: zeLinkedList.c:195
#define strrchr
Definition: ze-sys.h:219
#define FREE(x)
Definition: macros.h:37
#define ZE_CDBDIR
Definition: defs.h:34
#define JBT_INITIALIZER
Definition: zeBTree.h:85
#define COPYRIGHT
Definition: version.h:31
bool zeBTree_Init(ZEBT_T *, size_t, ZEBT_CMP_F)
Definition: zeBTree.c:96
void set_mime_debug(bool)
Definition: ze-demime.c:69
#define HASH_PLAIN
Definition: ze-msg-hash.h:27
#define HASH_MD5
Definition: ze-msg-hash.h:28
int ze_logLevel
Definition: zeSyslog.c:34
#define STRNULL(x, r)
Definition: macros.h:81
#define STRNCASEEQUAL(a, b, n)
Definition: macros.h:75
bool zeBTree_Add(ZEBT_T *, void *)
Definition: zeBTree.c:309
void zeLog_SetOutput(bool, bool)
Definition: zeSyslog.c:490
#define FALSE
Definition: macros.h:160
#define strlcpy
Definition: zeString.h:32
#define ZE_LogMsgError(level,...)
Definition: zeSyslog.h:113
bool zeStrRegex(char *, char *, long *, long *, bool)
Definition: zeStrings.c:544
bool progress
Definition: ze-bcheck.h:60
#define OPT_CHECK
Definition: ze-bayes-tbx.c:92
bool zeBTree_Destroy(ZEBT_T *)
Definition: zeBTree.c:192
sfilter_vsm_T bcheck
Definition: ze-bcheck.h:65
int mbox_handle(char *fname, mbox_F func, void *arg)
Definition: ze-mbox.c:34
bool bfilter_init(char *dbname)
Definition: ze-bfilter.c:200
int zeStr2Tokens(char *, int, char **, char *)
Definition: zeStrings.c:610
#define OPT_INIT
Definition: ze-bayes-tbx.c:89
bool histogram
Definition: ze-bcheck.h:67
bool sfilter_cli_handle_message(char *fname, int msgNb, void *arg)
Definition: ze-bcheck.c:252
#define strlcat
Definition: zeString.h:28
#define strchr
Definition: ze-sys.h:218
#define CTIME_R(t, s)
Definition: ze-sys.h:597
bool bfilter_close()
Definition: ze-bfilter.c:651
bool str2hash2hex(int code, char *sout, char *sin, size_t szout)
Definition: ze-msg-hash.c:107
bool verbose
Definition: ze-bcheck.h:59
long zeStr2long(char *s, int *error, long dval)
Definition: zeStrConvert.c:35
char * zeStrSet(char *, int, int)
Definition: zeStrings.c:330
bool zeBTree_Clear(ZEBT_T *)
Definition: zeBTree.c:222
char * hash_code2label(int code)
Definition: ze-msg-hash.c:209
#define ZE_MessageInfo(level,...)
Definition: zeSyslog.h:90
int nb
Definition: ze-connopen.c:61
int zeSafeStrnCpy(char *, size_t, char *, size_t)
Definition: zeStrings.c:136
int main(int argc, char **argv)
Definition: ze-bayes-tbx.c:125
#define TRUE
Definition: macros.h:157
#define ZE_MessageWarning(level,...)
Definition: zeSyslog.h:92
LISTR_T * next
Definition: zeLinkedList.h:37
#define SET_STATE(new)
Definition: ze-bayes-tbx.c:97
#define OPTSTR_INIT
Definition: ze-bayes-tbx.c:78
#define ZE_LogMsgWarning(level,...)
Definition: zeSyslog.h:112
void smodel_db_info(char *prefix, smodel_db_browse_F func, void *arg)
Definition: ze-bfilter.c:846
#define OPT_LEARN
Definition: ze-bayes-tbx.c:93
#define OPT_GROUP
Definition: ze-bayes-tbx.c:94
void usage(char *arg)
#define OPT_HELP
Definition: ze-bayes-tbx.c:90
char * zeStr2Upper(char *)
Definition: zeStrings.c:312
#define PACKAGE
Definition: version.h:28
#define STRCASEEQUAL(a, b)
Definition: macros.h:72
#define OPT_INFO
Definition: ze-bayes-tbx.c:91
char * dbname
Definition: ze-bcheck.h:61
bool set_bfilter_db_crypt(int crypt)
Definition: ze-bfilter.c:507
int histo[21]
Definition: ze-bcheck.h:66
int hash_label2code(char *label)
Definition: ze-msg-hash.c:189
char token[128]
Definition: ze-bfilter.h:156
int zeBTree_Browse(ZEBT_T *, ZEBT_BROWSE_F, void *)
Definition: zeBTree.c:262
Definition: zeBTree.h:73