/* texpire -- expire old articles Written by Arnt Gulbrandsen and copyright 1995 Troll Tech AS, Postboks 6133 Etterstad, 0602 Oslo, Norway, fax +47 22646949. Modified by Cornelius Krasel and Randolf Skerka . Copyright of the modifications 1997. Modified by Kent Robotti . Copyright of the modifications 1998. Modified by Markus Enzenberger . Copyright of the modifications 1998. Modified by Cornelius Krasel . Copyright of the modifications 1998, 1999. Modified by Kazushi (Jam) Marukawa . Copyright of the modifications 1998, 1999. Modified by Joerg Dietrich . Copyright of the modifications 1999. Modified by Stefan Wiens . Copyright of the modifications 2001. See README for restrictions on the use of this software. */ #include "leafnode.h" #ifdef SOCKS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUBJECT 1 #define FROM 2 #define DATE 3 #define MESSAGEID 4 #define REFERENCES 5 #define BYTES 6 #define LINES 7 #define XREF 8 time_t now; time_t default_expire; int debug = 0; int use_atime = 1; /* look for atime on articles to expire */ int repair_spool = 0; int group_relinked = FALSE; /* flags if this group has been relinked */ /* void freeactive(void); */ /* void free_xover(void); */ char gdir[PATH_MAX]; unsigned long deleted, kept; /* Thread expiry by Stefan Wiens 2001-01-08 */ /* * All Message-IDs in the Message-ID: and References: XOVER field of an * article together are considered as a thread. The first encountered * article starts an initial thread. * For all subsequent articles, we check if any of its IDs is member of an * already known thread. Then we put together all subthreads this article * is a member of. * After all articles have been considered, we have a list of all threads * in this group. * Each thread is then searched for an article that has been created/read * more recently than the expiry limit. If one exists, the entire thread * is rescued from expiry. (Thanks to Joerg Dietrich for inspiration.) * * "threadlist" is the origin of a linked list of threads. Each element of * this list points to a list of Message-IDs known to be members of this * thread. Each Message-ID element (rnode) itself has a pointer to its * thread list element, so we can easily determine which thread it * belongs to. * For quick lookup by Message-ID, each of those Message-ID elements * (rnode) is also part of a list which contains all IDs with the same * hash value. These hash lists are pointed to by a hash table. */ int use_middir = 0; /* don't use message.id directory for stat() */ struct rnode { struct rnode *nhash; /* next rnode in this hash list */ struct rnode *nthread; /* next rnode in this thread */ struct thread *fthread; /* start of this thread */ unsigned long artno; /* article number, 0 if unknown */ char *mid; /* Message-ID */ }; struct thread { struct thread *next; /* next thread in list */ struct rnode *thread; /* first rnode in this thread */ } *threadlist = NULL; /* starting point of thread list */ #define HASHSIZE 12345 struct rnode *hashtab[HASHSIZE]; /* each entry points to a list of */ /* rnodes with same hash value */ unsigned long hashval(const struct rnode *node); void hash_thread(struct thread *thread); struct rnode *newnode(char *mid, unsigned long artno); struct rnode *findnode(const struct rnode *node); void merge_threads(struct thread *a, struct thread *b); struct thread *xoverthread(char *xoverline, unsigned long artno); void build_threadlist(unsigned long acount); void free_threadlist(void); unsigned long count_threads (void); void remove_newer(void); void delete_article(struct rnode *r); void delete_threads(void); void relink(void); unsigned long low_wm(unsigned long high); /* very simple hash function ;-) */ unsigned long hashval(const struct rnode *node) { unsigned long val; char *p; int i; val = 0; p = node->mid; for (i=0; i<20 && p && *p ; ++i) { val += val ^ (i + *p++); } return (val % HASHSIZE); } /* put all references in this thread into hash table */ void hash_thread(struct thread *thread) { struct rnode *r; unsigned long h; r = thread->thread; while (r) { h = hashval(r); /* no need to check for duplicates ;-) */ r->nhash = hashtab[h]; hashtab[h] = r; /* push on list */ r = r->nthread; } } /* create a new reference node */ struct rnode *newnode(char *mid, unsigned long artno) { struct rnode *new; new = (struct rnode *)critmalloc(sizeof(struct rnode), "Allocating new refecence"); new->nhash = new->nthread = NULL; new->fthread = NULL; new->artno = artno; new->mid = mid; return new; } /* find node with same message-ID, return node or NULL if not found */ struct rnode *findnode(const struct rnode *node) { struct rnode *f; if (!*(node->mid)) { return NULL; } f = hashtab[hashval(node)]; while (f) { if (strcmp(f->mid, node->mid) == 0) { return f; } f = f->nhash; /* try next in list */ } return NULL; } /* merge thread b into a */ void merge_threads(struct thread *a, struct thread *b) { struct rnode *r; if (!(r = b->thread)) { /* nothing to do */ return; } while (r->fthread = a , /* update start of thread pointer */ r->nthread) { /* for all references in thread */ r = r->nthread; } /* r now points to the last reference in b */ r->nthread = a->thread; /* now link b in front of a */ a->thread = b->thread; b->thread = NULL; /* XXX the now empty thread b */ /* could immediately be removed from threadlist */ } /* * return a thread built from an XOVER line, * containing its Message-ID and all references */ struct thread *xoverthread(char *xoverline, unsigned long artno) { int i; char *p, *q, *r; struct thread *new; struct rnode *node; p = xoverline; if (!p || !*p || !artno) { /* illegal */ return NULL; } node = newnode("", artno); /* make this the start of a new thread */ new = (struct thread *)critmalloc(sizeof(struct thread), "Allocating new thread"); new->next = NULL; new->thread = node; node->fthread = new; for(i=0 ;i'))) { return new; } /* p now is a valid Message-ID */ *++q = '\0'; /* zero terminate Message-ID */ node->mid = p; p = q = r; /* start of References: field */ if (!(r = strchr(r, '\t'))) { /* end of references */ return new; } *r = '\0'; while (*q && q'))) { *++q = '\0'; /* zero terminate this reference */ if ((p = strrchr(p, '<'))) { node = newnode(p, 0); /* unknown artno */ node->nthread = new->thread; node->fthread = new; /* put into this thread */ new->thread = node; } p = ++q; } return new; } /* * generate threadlist from xoverinfo */ void build_threadlist(unsigned long acount) { unsigned long i; struct thread *x, *firstfound; struct rnode *f, *r, **u; for (i=0; ithread); /* needed for punching out elements */ r = *u; firstfound = NULL; while (r) { if ((f = findnode(r))) { /* is this MID already known? */ if (r->artno) { if (f->artno == 0) { /* possibly update artno */ f->artno = r->artno; } else if (f->artno != r->artno) { /* duplicate */ delete_article(r); } } if (firstfound) { /* link subthreads */ if (f->fthread != firstfound) { /* not merged yet? */ merge_threads(firstfound, f->fthread); } } else { /* this thread everything will go into */ firstfound = f->fthread; } *u = r->nthread; /* remove this element from thread */ free(r); } else { u = &(r->nthread); } r = *u; } hash_thread(x); if (firstfound) { /* this article is part of another thread */ merge_threads(firstfound, x); free(x); } else { /* entirely new thread */ x->next = threadlist; threadlist = x; } } } } /* free all rnodes and threads, empty hash table */ void free_threadlist(void) { unsigned long i; struct rnode *r; struct thread *t; for (i=0; inhash; free(r); } } while ((t = threadlist)) { threadlist = t->next; free(t); } } /* return number of threads in threadlist */ unsigned long count_threads (void) { unsigned long n; struct thread *t; n = 0; t = threadlist; while (t) { if (t->thread) { ++n; } t = t->next; } return n; } /* remove all threads from list that have * at least one new member */ void remove_newer(void) { struct thread *t; struct rnode *r; struct stat st; int sf; /* flags 0 on stat() failure */ const char *m; char name[PATH_MAX]; t = threadlist; while (t) { if (t->thread) { r = t->thread; while (r) { if (r->artno) { sf = 0; /* once we have stat info, we check it: */ if (use_middir && r->mid && (m=lookup(r->mid))){ /* try to stat() via message.id/ dir */ sf = !(stat(m,&st)); if (!sf || st.st_nlink < 2) { relink(); /* something's wrong here */ } } if (!sf) { /* get stat via article number */ snprintf(name, PATH_MAX-1, "%lu", r->artno); sf = !(stat(name, &st)); if ( sf && st.st_nlink < 2) { relink(); /* something's wrong here */ sf = !(stat(name, &st)); /* may have been unlinked */ } } if (sf && S_ISREG(st.st_mode) && ((use_atime ? st.st_atime : st.st_mtime) > expire)) { goto rescue; /* this thread will be rescued, */ } /* no need to look further */ } r = r->nthread; } rescue: ; if (r) { /* a newer article was found, */ t->thread = NULL; /* disconnect from threadlist. */ } /* we later free() from hashtab[] */ } t = t->next; } } /* delete article file which belongs to this node */ void delete_article(struct rnode *r) { char name[PATH_MAX]; if (!r || !r->artno) { return; } snprintf(name, PATH_MAX-1, "%lu", r->artno); if (!unlink(name)) { if (debugmode) { syslog(LOG_DEBUG, "deleted article %s/%lu", gdir, r->artno); } r->artno = 0; deleted++; } else if (errno != ENOENT && errno != EEXIST) { /* if file was deleted already or it was not a file */ /* but a directory, skip error message */ syslog(LOG_ERR, "unlink %s/%lu: %m", gdir, r->artno); } } /* delete all article files in all remaining threads */ void delete_threads(void) { struct thread *t; struct rnode *r; t = threadlist; while (t) { r = t->thread; while (r) { if (r->artno && r->mid) { delete_article(r); } r = r->nthread; } t = t->next; } } /* create missing links in the message.id subdirectory */ void relink(void) { unsigned long i; struct rnode *r; const char *m; char name[PATH_MAX]; struct stat st; if (group_relinked) { return; /* once per group is enough */ } for (i=0; iartno && r->mid) { snprintf(name, PATH_MAX-1, "%lu", r->artno); if (!stat(name, &st) && S_ISREG(st.st_mode) && (st.st_nlink < 2) && (m = lookup(r->mid))) { /* repair fs damage */ if (link(name, m)) { if (errno == EEXIST) { /* exists, but points to another file */ delete_article(r); } else { syslog(LOG_ERR, "relink of %s failed: %m (%s)", r->mid, m); } } else { syslog(LOG_INFO, "relinked message %s", m); } } } r = r->nhash; } } group_relinked = TRUE; /* don't try twice for the same group */ } /* * find lowest article number, lower than high, * also count total number of articles */ unsigned long low_wm(unsigned long high) { unsigned long low, i; struct rnode *r; low = high; kept = 0; for (i=0; iartno) { /* don't count nonexisting articles */ ++kept; if (r->artno < low) { low = r->artno; } } r = r->nhash; } } return low; } /* ... */ /* 05/27/97 - T. Sweeney - Find a group in the expireinfo linked list and return its expire time. Otherwise, return zero. */ static time_t lookup_expire(char* group) { struct expire_entry *a; a = expire_base; while ( a ) { if (ngmatch(a->group, group) == 0) return a->xtime; a = a->next; } return 0; } void free_expire( void ) { struct expire_entry *a, *b; b = expire_base; while ((a = b) != NULL) { b = a->next; free(a); } } /* * return 1 if xover is a legal overview line, 0 else */ static int legalxoverline ( char * xover, unsigned long artno ) { char * p; char * q; if ( !xover ) return 0; /* anything that isn't tab, printable ascii, or latin-* ? then killit */ p = xover; while ( *p ) { int c = (unsigned char)*p++; if ( ( c != '\t' && c < ' ' ) || ( c > 126 && c < 160 ) ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: non-printable chars.", artno ); return 0; } } p = xover; q = strchr( p, '\t' ); if ( !q ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: no Subject: header.", artno ); return 0; } /* article number */ while( p != q ) { if ( !isdigit((unsigned char)*p) ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: article " "number must consists of digits.", artno ); return 0; } p++; } p = q+1; q = strchr( p, '\t' ); if ( !q ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: no From: header.", artno ); return 0; } /* subject: no limitations */ p = q+1; q = strchr( p, '\t' ); if ( !q ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: no Date: header.", artno ); return 0; } /* from: no limitations */ p = q+1; q = strchr( p, '\t' ); if ( !q ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: no Message-ID: header.", artno ); return 0; } /* date: no limitations */ p = q+1; q = strchr( p, '\t' ); if ( !q ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: no References: or Bytes: header.", artno ); return 0; } /* message-id: <*@*> */ if ( *p != '<' ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: Message-ID does not start with <.", artno ); return 0; } while ( p != q && *p != '@' && *p != '>' && *p != ' ' ) p++; if ( *p != '@' ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: Message-ID does not contain @.", artno ); return 0; } while ( p != q && *p != '>' && *p != ' ' ) p++; if ( (*p != '>') || (++p != q) ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: Message-ID does not end with >.", artno ); return 0; } p = q+1; q = strchr( p, '\t' ); if ( !q ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: no Bytes: header.", artno ); return 0; } /* references: a series of <*@*> separated by space */ while ( p != q ) { if ( *p != '<' ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: " "Reference does not start with <.", artno ); return 0; } while ( p != q && *p != '@' && *p != '>' && *p != ' ' ) p++; if ( *p != '@' ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: Reference does not contain @.", artno ); return 0; } while ( p != q && *p != '>' && *p != ' ' ) p++; if ( *p++ != '>' ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: Reference does not end with >.", artno ); return 0; } while ( p != q && *p == ' ' ) p++; } p = q+1; q = strchr( p, '\t' ); if ( !q ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: no Lines: header.", artno ); return 0; } /* byte count */ while( p != q ) { if ( !isdigit((unsigned char)*p) ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: illegal digit " "in Bytes: header.", artno ); return 0; } p++; } p = q+1; q = strchr( p, '\t' ); if ( q ) *q = '\0'; /* kill any extra fields */ /* line count */ while( p && *p && p != q ) { if ( !isdigit((unsigned char)*p) ) { if ( debugmode ) syslog( LOG_DEBUG, "%lu xover error: illegal digit " "in Lines: header.", artno ); return 0; } p++; } return 1; } /* * dogroup: expire group */ static void dogroup(struct newsgroup* g) { unsigned long first, last, acount, i; acount = 0; deleted = kept = 0; /* eliminate empty groups */ if (!chdirgroup(g->name, FALSE)) { return; } getcwd(gdir, PATH_MAX); /* read overview information */ if (!getxover(g->name)) { return; } /* find low-water and high-water marks */ first = ULONG_MAX; last = 0; acount = 0; for (i=0 ; xoverinfo[i].artno ; ++i) { if (xoverinfo[i].exists) { if (first > xoverinfo[i].artno) { first = xoverinfo[i].artno; } if (last < xoverinfo[i].artno) { last = xoverinfo[i].artno; } ++acount; } } if (verbose > 1) { printf("%s: low water mark %lu, high water mark %lu\n", g->name, first, last); } if (debugmode) { syslog( LOG_DEBUG, "%s: expire %lu, low water mark %lu, high water mark %lu", g->name, expire, first, last); } if (expire <= 0) { return; } /* check the syntax of the .overview info */ if (debugmode) { for (i=0; ifirst = first; if (last > g->last) { /* try to correct insane newsgroup info */ g->last = last; } if (deleted || kept) { if (verbose) printf("%s: %lu articles deleted, %lu kept\n", g->name, deleted, kept); syslog(LOG_INFO, "%s: %lu articles deleted, %lu kept", g->name, deleted, kept); } if (!kept) { if (unlink(".overview") < 0) syslog(LOG_ERR, "unlink %s/.overview: %m", gdir); if (!chdir("..") && (isinteresting(g->name) == 0)) { /* delete directory and empty parent directories */ while (rmdir(gdir) == 0) { getcwd(gdir, PATH_MAX); chdir(".."); } } } /* Once we're done and there's something left we have to update the * .overview file. Otherwise unsubscribed groups will never be * deleted. */ getxover(g->name); } static void expiregroup(struct newsgroup* g) { struct newsgroup * ng; ng = g; while ( ng && ng->name ) { if (!(expire = lookup_expire(ng->name))) expire = default_expire; dogroup( ng ); ng++; } } static void expiremsgid(void) { int n; DIR * d; struct dirent * de; struct stat st; int deleted, kept; deleted = kept = 0; for ( n=0; n<1000; n++ ) { snprintf( s, PATH_MAX, "%s/message.id/%03d", spooldir, n ); if ( chdir( s ) ) { if ( errno == ENOENT ) mkdir( s, 0755 ); /* file system damage again */ if ( chdir( s ) ) { syslog( LOG_ERR, "chdir %s: %m", s ); continue; } } d = opendir( "." ); if ( !d ) continue; while ((de = readdir(d)) != 0) { if (stat(de->d_name, &st) == 0) { if (st.st_nlink < 2 && !unlink(de->d_name)) deleted++; else if (S_ISREG(st.st_mode)) kept++; } } closedir( d ); } if ( kept || deleted ) { printf("total: %d articles deleted, %d kept\n", deleted, kept); syslog( LOG_INFO, "%d articles deleted, %d kept", deleted, kept ); } } static void usage( void ) { fprintf( stderr, "Usage:\n" "texpire -V\n" " print version on stderr and exit\n" "texpire [-Dfv] [-F configfile]\n" " -D: switch on debugmode\n" " -r: relink articles with message.id tree\n" " -M: get article age from message.id tree (EXPERIMENTAL)\n" " -f: force expire irrespective of access time\n" " -v: more verbose (may be repeated)\n" " -F: use \"configfile\" instead of %s/config\n" "See also the leafnode homepage at http://www.leafnode.org/\n", libdir ); } int main(int argc, char** argv) { int option, reply; char * conffile; version = "2.0b_sw1"; conffile = critmalloc( strlen(libdir) + 10, "Allocating space for configuration file name" ); snprintf( conffile, strlen(libdir)+9, "%s/config", libdir ); if ( !initvars(argv[0]) ) exit(EXIT_FAILURE); #ifdef HAVE_OLD_SYSLOG openlog( "texpire", LOG_PID ); #else openlog( "texpire", LOG_PID|LOG_CONS, LOG_NEWS ); #endif while ( (option=getopt( argc, argv, "F:VDvfrM" )) != -1 ) { if ( parseopt( "texpire", option, optarg, conffile ) ) { ; } else if ( option == 'f' ) { use_atime = 0; } else if (option == 'r') { repair_spool = 1; } else if (option == 'M') { use_middir = 1; } else { usage(); exit(EXIT_FAILURE); } } debug = debugmode; expire = 0; expire_base = NULL; if ( ( reply = readconfig( conffile ) ) != 0 ) { fprintf( stderr, "Reading configuration from %s failed (%s).\n", conffile, strerror(reply) ); exit( 2 ); } if ( lockfile_exists( FALSE, FALSE ) ) exit(EXIT_FAILURE); readactive(); if ( !active ) { fprintf( stderr, "Reading active file failed, exiting " "(see syslog for more information).\n" "Has fetchnews been run?\n" ); exit( 2 ); } if ( verbose ) { printf( "texpire %s: ", version ); if ( use_atime ) printf( "check mtime and atime\n" ); else printf( "check mtime only\n" ); } if ( debugmode ) { syslog( LOG_DEBUG, "texpire %s: use_atime is %d; repair_spool is %d", version, use_atime, repair_spool ); } if ( expire == 0 ) { fprintf( stderr, "%s: no expire time\n", argv[0] ); exit( 2 ); } now = time( NULL ); default_expire = expire; expiregroup(active); writeactive(); /* release_lockfile(); */ /* freeactive(); */ /* throw away active data */ /* free_xover(); */ /* throw away overview data */ free_expire(); expiremsgid(); return 0; }