ze-filter  (ze-filter-0.8.0-develop-180218)
ze-bcheck.c
Go to the documentation of this file.
1 /*
2  *
3  * ze-filter - Mail Server Filter for sendmail
4  *
5  * Copyright (c) 2001-2018 - Jose-Marcio Martins da Cruz
6  *
7  * Auteur : Jose Marcio Martins da Cruz
8  * jose.marcio.mc@gmail.org
9  *
10  * Historique :
11  * Creation : Mon Jun 19 17:24:56 CEST 2006
12  *
13  * This program is free software, but with restricted license :
14  *
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
19  *
20  * More details about ze-filter license can be found at ze-filter
21  * web site : http://foss.jose-marcio.org
22  */
23 
24 
25 #include <ze-sys.h>
26 #include <ze-filter.h>
27 #include <libml.h>
28 
35 /* ****************************************************************************
36 ** ###### # # ## # # # ## ##### ######
37 ** # # # # # # # # # # # #
38 ** ##### # # # # # # # # # # #####
39 ** # # # ###### # # # ###### # #
40 ** # # # # # # # # # # # #
41 ** ###### ## # # ###### #### # # # ######
42 **************************************************************************** */
43 #define KULLBACK_AVERAGE 4
44 
45 #if 0
46 static int
47 which_average_method()
48 {
49  char *env = NULL;
50 
51  if ((env = getenv("BAYES_AVERAGE")) != NULL)
52  {
53  if (STRCASEEQUAL(env, "KULLBACK"))
54  return KULLBACK_AVERAGE;
55  }
56  return KULLBACK_AVERAGE;
57 }
58 #endif
59 
60 #define PROBVALUE(a) (fabs(0.5 - (a)))
61 
62 static double
63 probability_average(t, n)
64  sfilter_token_T *t;
65  int n;
66 {
67 #if 0
68  int method = KULLBACK_AVERAGE;
69 #endif
70  bfilter_T *bf = NULL;
71 
72  bf = bfilter_ptr();
73 
74 #if 0
75  method = which_average_method();
76 
77  if (method == KULLBACK_AVERAGE)
78 #endif
79  {
80  double lodd = 0.;
81  int i;
82  int nt = 0;
83 
84  double nms, nmh;
85  double nm;
86  double ks, kh;
87 
88 #if 0
89  double hmt = 0;
90 #endif
91 
92 #if 1
93  nms = bf->nbTokensSpam;
94  nmh = bf->nbTokensHam;
95 #else
96  nms = bf->nbMsgsSpam;
97  nmh = bf->nbMsgsHam;
98 #endif
99 
100  nm = MAX(nms, nmh);
101  ks = nms < nm ? (nm / nms) : 1.;
102  kh = nmh < nm ? (nm / nmh) : 1.;
103 
104  nt = 0;
105  for (i = 0; i < n; i++)
106  {
107  if (t[i].ok)
108  {
109  double ns = 0, nh = 0;
110 #if 0
111  double p = t[i].prob;
112 #endif
113 
114  ns = ks * t[i].nts;
115  nh = kh * t[i].nth;
116 
117 #if 1
118  lodd += log((ns + 0.5) / (nh + 0.5));
119  nt++;
120 #else
121  lodd += t[i].nb * log((ns + 0.5) / (nh + 0.5));
122  nt += t[i].nb;
123 #endif
124 
125 #if 0
126  hmt -= (p * log(p) + (1. - p) * log(1. - p));
127 #endif
128  }
129  }
130 
131  if (nt > 0)
132  lodd /= nt;
133 #if 0
134  hmt = 1 - hmt / (nt * log(2.));
135  ZE_MessageInfo(19, "X HMT = %6.3f", hmt);
136 #endif
137 
138  return 1 / (1 + exp(-lodd));
139  }
140 
141  return 0.;
142 }
143 
144 #define TOK_PERT_CMP(a,b) ((a)->value > (b)->value ? 1 : -1)
145 
146 static int
147 bptokcmp(const void *a, const void *b)
148 {
149  sfilter_token_T *ta = (sfilter_token_T *) a;
150  sfilter_token_T *tb = (sfilter_token_T *) b;
151 
152  ASSERT(ta != NULL);
153  ASSERT(tb != NULL);
154 
155 #if 1
156  return TOK_PERT_CMP(ta, tb);
157 #else
158  {
159  double pa, pb;
160 
161  pa = PROBVALUE(ta->prob);
162  pb = PROBVALUE(tb->prob);
163  if (pa > pb)
164  return 1;
165  if (pa < pb)
166  return -1;
167  return 0;
168  }
169 #endif
170 }
171 
172 static int
173 browse_tokens(void *node, void *arg)
174 {
175  sfilter_token_T *t = node;
176  sfilter_vsm_T *bp = arg;
177 
178  if (!smodel_db_check_token(t->token, t))
179  t->prob = UT_PROB;
180 
181  if (strlen(bp->tok[0].token) != 0)
182  {
183 #if 1
184  if (TOK_PERT_CMP(t, &(bp->tok[0])) > 0)
185 #else
186  if (PROBVALUE(t->prob) > PROBVALUE(bp->tok[0].prob))
187 #endif
188  {
189  bp->tok[0] = *t;
190  qsort(bp->tok, bp->nbt, sizeof (sfilter_token_T), bptokcmp);
191  }
192  } else
193  {
194  bp->tok[0] = *t;
195  qsort(bp->tok, bp->nbt, sizeof (sfilter_token_T), bptokcmp);
196  }
197 
198  bp->nb++;
199 
200  return 1;
201 }
202 
203 /* ****************************************************************************
204 ** ##### #### # # ###### # # # ###### ######
205 ** # # # # # # ## # # # #
206 ** # # # #### ##### # # # # # #####
207 ** # # # # # # # # # # # #
208 ** # # # # # # # ## # # #
209 ** # #### # # ###### # # # ###### ######
210 **************************************************************************** */
211 static int crypt_tok = HASH_PLAIN;
212 
213 static int
214 list_tokens(node, arg)
215  void *node;
216  void *arg;
217 {
218  sfilter_token_T *t = node;
219  sfilter_cli_T *bp = arg;
220  char *s = t->token;
221  char buf[64];
222 
223  switch (crypt_tok)
224  {
225  case HASH_PLAIN:
226  break;
227  case HASH_MD5:
228  case HASH_SHA1:
229  memset(buf, 0, sizeof (buf));
230  (void) str2hash2hex(crypt_tok, buf, t->token, sizeof (buf));
231  s = buf;
232  break;
233  default:
234  break;
235  }
236 
237  printf("TOKEN %s %s %c %6d %s\n", bp->timestr, bp->id,
238  (bp->spam ? 'S' : 'H'), t->nb, s);
239 
240  return 1;
241 }
242 
243 /* ****************************************************************************
244 ** #### ###### ###### # # # # ######
245 ** # # # # # # ## # #
246 ** # # ##### ##### ##### # # # # # #####
247 ** # # # # # # # # # #
248 ** # # # # # # # ## #
249 ** #### # # ###### # # # ######
250 **************************************************************************** */
251 bool
252 sfilter_cli_handle_message(fname, msgNb, arg)
253  char *fname;
254  int msgNb;
255  void *arg;
256 {
257  sfilter_cli_T *data = (sfilter_cli_T *) arg;
258  char timestr[32];
259  time_t now;
260  bool res;
261  char id[32];
262 
263  snprintf(id, sizeof (id), "%s.%06d", "MSG", msgNb);
264 
266 
267  if (data->maxSize > 0 && zeGetFileSize(fname) > data->maxSize)
268  return FALSE;
269 
270  if (data->check)
271 #if 0
272  SFILTER_VSM_INIT(&(data->bcheck), data->nbt, data->uprob);
273 #else
274  {
275  int n;
276 
277  if (data->nbt < MAX_TOK)
278  data->bcheck.nbt = data->nbt;
279  else
280  data->bcheck.nbt = MAX_TOK;
281  for (n = 0; n < MAX_TOK; n++)
282  data->bcheck.tok[n].prob = UT_PROB;
283  }
284 #endif
285 
286  now = time(NULL);
287  snprintf(timestr, sizeof (timestr), "%ld", now);
288  data->timestr = timestr;
289  data->id = id;
290 
291  crypt_tok = get_bfilter_db_crypt();
292 
293  if (data->check)
294  res = bfilter_handle_message(id, fname, browse_tokens, &data->bcheck);
295  else
296  res = bfilter_handle_message(id, fname, list_tokens, data);
297 
298  if (res && data->check)
299  {
300  double prob;
301  int n;
302 
303  if (data->verbose)
304  {
305  for (n = 0; n < data->bcheck.nbt; n++)
306  printf("TOKEN %3d %3d %6.3f %s\n", n, data->bcheck.tok[n].nb,
307  data->bcheck.tok[n].prob, data->bcheck.tok[n].token);
308  }
309  prob = probability_average(data->bcheck.tok, data->bcheck.nbt);
310  if (data->progress)
311  printf("%5d : RESULT = %6.4f\n", msgNb, prob);
312  n = 20 * prob;
313  data->histo[n]++;
314  }
315 
316  return res;
317 }
318 
319 /* ****************************************************************************
320 ** #### # # # # # # ######
321 ** # # ## # # # ## # #
322 ** # # # # # ##### # # # # # #####
323 ** # # # # # # # # # # #
324 ** # # # ## # # # ## #
325 ** #### # # ###### # # # ######
326 **************************************************************************** */
327 double
328 sfilter_check_message(id, fname, bcheck)
329  char *id;
330  char *fname;
331  sfilter_vsm_T *bcheck;
332 {
333  int n;
334  double prob = -1.;
335 
336  for (n = 0; n < MAX_TOK; n++)
337  bcheck->tok[n].prob = 0.5;
338 
339  if (bfilter_handle_message(id, fname, browse_tokens, bcheck))
340  prob = probability_average(bcheck->tok, bcheck->nbt);
341 
342  ZE_MessageInfo(11, "PROB = %6.2f", prob);
343  return prob;
344 }
345 
346 void
348 {
349 
350 }
351 
#define SHOW_CURSOR(zero)
Definition: macros.h:249
#define MAX(a, b)
Definition: macros.h:139
double uprob
Definition: ze-bcheck.h:69
double sfilter_check_message(char *id, char *fname, sfilter_vsm_T *bcheck)
Definition: ze-bcheck.c:328
size_t maxSize
Definition: ze-bcheck.h:57
#define HASH_SHA1
Definition: ze-msg-hash.h:29
long nbTokensSpam
Definition: ze-bfilter.h:54
#define ASSERT(a)
Definition: macros.h:27
void sfilter_histogram()
Definition: ze-bcheck.c:347
bfilter_T * bfilter_ptr()
Definition: ze-bfilter.c:60
#define HASH_PLAIN
Definition: ze-msg-hash.h:27
#define HASH_MD5
Definition: ze-msg-hash.h:28
bool smodel_db_check_token(char *key, sfilter_token_T *token)
Definition: ze-bfilter.c:723
#define TOK_PERT_CMP(a, b)
Definition: ze-bcheck.c:144
bool ok
Definition: ze-connopen.c:59
#define FALSE
Definition: macros.h:160
size_t zeGetFileSize(char *)
Definition: zeFileTools.c:132
bool progress
Definition: ze-bcheck.h:60
sfilter_vsm_T bcheck
Definition: ze-bcheck.h:65
char * id
Definition: ze-bcheck.h:62
#define PROBVALUE(a)
Definition: ze-bcheck.c:60
#define KULLBACK_AVERAGE
Definition: ze-bcheck.c:43
#define UT_PROB
Definition: ze-bfilter.h:190
bool sfilter_cli_handle_message(char *fname, int msgNb, void *arg)
Definition: ze-bcheck.c:252
long nbMsgsSpam
Definition: ze-bfilter.h:48
bool str2hash2hex(int code, char *sout, char *sin, size_t szout)
Definition: ze-msg-hash.c:107
bool verbose
Definition: ze-bcheck.h:59
#define ZE_MessageInfo(level,...)
Definition: zeSyslog.h:90
sfilter_token_T tok[MAX_TOK]
Definition: ze-bcheck.h:36
long nbMsgsHam
Definition: ze-bfilter.h:49
#define SFILTER_VSM_INIT(bc, nbt, prob)
Definition: ze-bcheck.h:41
long nbTokensHam
Definition: ze-bfilter.h:55
#define STRCASEEQUAL(a, b)
Definition: macros.h:72
char * timestr
Definition: ze-bcheck.h:64
#define MAX_TOK
Definition: ze-bcheck.h:33
int histo[21]
Definition: ze-bcheck.h:66
char token[128]
Definition: ze-bfilter.h:156
int get_bfilter_db_crypt()
Definition: ze-bfilter.c:600
bool bfilter_handle_message(char *id, char *fname, btsm_browse_F func, void *arg)