#/usr/local/bin/perl 'di'; 'ig00'; # # $Header: /tmp_mnt/home/netlabs1/lwall/pl/RCS/clip,v 1.1 92/07/13 12:37:09 lwall Exp Locker: lwall $ # # $Log: clip,v $ # Revision 1.1 92/07/13 12:37:09 lwall # Initial revision # $HOME = $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7] || die "No home directory!!!\n"; # Configurable parameters, may be overridden in .cliprc $SPOOLDIR = "/usr/spool/news"; # Where news articles are stored. $NEWSLIB = "/usr/lib/news"; # Where Cnews keeps history file. $HOMETMP = "$HOME/tmp"; # Where clip should put output. $HOMEBIN = "$HOME/bin"; # Where clip should install N script. $MAXLOAD = 3; # What load average to suspend at. $NICE = 16; # What priority to run at. $DEBUG = 0; # Whether to be noisy. $CLIPRC = "$HOME/.cliprc"; require $CLIPRC; # Everything from here on should be machine independent. open(N0, "$HOMETMP/n0"); $oldpid = + 0; close N0; if ($oldpid) { die "Already a clip process running ($oldpid)\n" if kill 0, $oldpid; } die "You must call both &NGSKIP and &SCANNER in $CLIPRC\n" unless defined &ngskip && defined &scanner; sub fixmsg { local($_, $file, $line) = @_; $line -= ($Preamble =~ y/\n//); s/ file \(eval\) at line (\d+)/" $file at line " . ($1 + $line)/eg; s/ at \(eval\) line (\d+)/" in $file at line " . ($1 + $line)/eg; # $* = 1;s/^/$0: /g; # too noisy to have this die; } sub NGSKIP { local($userstuff) = @_; if ($userstuff =~ tr/\n// >= 3 && $userstuff !~ /study/) { $study = "\t\tstudy;\n"; } else { $study = ""; } $eval = (($Preamble = <<'END1' . $study) . $userstuff . <<'END2'); sub ngskip { local($_) = $nglist; eval { &skip if /^cancelled$/; END1 }; if ($@) { $@ = "" if $@ eq "You should never see this\n"; die $@ if $@; } } END2 print STDERR $eval if $DEBUG & 1; eval $eval; &fixmsg($@, (caller)[1,2]) if $@; 1; } sub SCANNER { local($userstuff) = @_; $eval = (($Preamble=<<'END1') . $userstuff . <<'END2'); sub scanner { while () { $totalhits = 0; do { study; $hits = 0; END1 $totalhits += $hits; } while $hits; &printhit if $totalhits; } } END2 print STDERR $eval if $DEBUG & 1; eval $eval; &fixmsg($@, (caller)[1,2]) if $@; 1; } require "timelocal.pl"; fork && exit; # avoid nohup behavior $pid = $$; $pmeter = fork; defined $pmeter || die "can't fork: $!"; if ($pmeter == 0) { &pmeter($MAXLOAD, $pid, $DEBUG & 2); die "Not reached"; } if ($HOMEBIN) { system "echo kill -HUP $pid >$HOMEBIN/N"; system "(echo ps $pid; echo ps $pmeter) >$HOMEBIN/P"; chmod 0755, "$HOMEBIN/N"; chmod 0755, "$HOMEBIN/P"; } setpriority(0, 0, $NICE); # set very slow priority $date = shift; chop($date = `cat $HOME/.lastclip`) if !$date && -f "$HOME/.lastclip"; $date = &lidate($date); chdir $SPOOLDIR || die "Can't cd: $!\n"; select(STDERR); $| = 1; select(STDOUT); $| = 1; &CATCH; $SIG{HUP} = CATCH; # send SIGHUP to switch to new history file $SIG{USR1} = RESTART; # send SIGUSR1 to re-execute clip $SIG{ALRM} = IGNORE; # so we can send ourselves an alarm safely $r = "\r" if -t STDOUT; ($dev,$ino,$mode,$nlink,$uid) = stat STDOUT; $origuid = $uid; for (;;) { LOGLINE: while () { $pos = tell(LOG); chop; ($messid,$date,$nglist) = split(/\t/); $0 = "clip at $messid" if $DEBUG & 1; $wanted = 1; &ngskip; next LOGLINE unless $wanted; $date = &gidate($date); ($ng,$art) = split(m![ /]!,$nglist); next unless $art; $ng =~ y!.!/!; open(ART,"$ng/$art") || next; next if -s ART > 100_000; $count = 0; ++$slept; # to force quick update after big batch $/ = ''; $header = ; $_ = ''; &scanner; close ART; $sleep = 5; } continue { $/ = "\n"; } print STDERR "clip: caught up$r\n" unless $tailing++; sleep $sleep; $slept += $sleep; if ($slept > 300) { $slept = 0; if ($date != $lastdate) { ($dev,$ino,$mode,$nlink,$uid,$gid) = stat STDOUT; exit unless $uid == $origuid; open(LASTDATE,">$HOME/.lastclip"); print LASTDATE &cdate($date),"\n"; close LASTDATE; $lastdate = $date; } ($dev,$ino) = stat("$NEWSLIB/history"); if ($dev != $logdev || $ino != $logino) { print STDERR "************* NEW HISTORY FILE *************$r\n"; open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n"; ($logdev, $logino) = stat LOG; &seekdate($date); $tailing = 0; $slept = 300; # force immediate update of lastclip. } } $sleep++ if $sleep < 120; seek(LOG,$pos,0); } sub hit { ($before,$match,$after) = ($`,$&,$'); $match =~ s/(.)/_\b$1/g; $_ = "$before$match$after"; study; $hits++; unless ($count++) { $header =~ /From: (.*)/; ($from = $1) =~ s/.*\((.*)\).*/$1/; print("$r\n\@$ng/$art \tFrom: $from$r\n") || exit; $header =~ /Subject: (.*)/ && print "Subject: $1$r\n"; $0 = "clip " . &cdate($date); } } sub skip { $wanted = 0; die "You should never see this\n"; } sub printhit { if (length() > 500) { $wanted = ''; for ($line = 0; /.*\n/g; $line++) { if ($& =~ /_\010/) { for ($w = $line - 3; $w <= $line + 3; $w++) { vec($wanted,$w,1) = 1; } } } $unlines = 0; for ($line = 0; /.*\n/g; $line++) { if (vec($wanted,$line,1)) { print($&,$r) || exit; $unlines = 0; } else { print("...\n$r") || exit unless $unlines++; } } } else { s/\n/\n$r/g if $r; print() || exit; } } sub seekdate { local($start) = shift; if ($start == 9_999_999_999) { seek(LOG,0,2); $pos = tell(LOG); print STDERR "$pid starting at eof...$r\n"; return; } ($st_dev,$st_ino,$st_mode,$st_nlink,$st_uid,$st_gid,$st_rdev,$st_size, $st_atime,$st_mtime,$st_ctime,$st_blksize,$st_blocks) = stat(LOG); for ($offset = $st_size - 100_000; $offset > 0; $offset -= 100_000) { if (seek(LOG,$offset,0)) { $_ = ; # probably starts in middle of a line $_ = ; ($messid,$date,$nglist) = split(/\t/); $date = &gidate($date); last if $date < $start; } else { $offset = -1; } } seek(LOG,0,0) if $offset < 0; while () { ($messid,$date,$nglist) = split(/\t/); $date = &gidate($date); last if $date >= $start; } $pos = tell(LOG); $pct = int($pos * 100 / $st_size); print STDERR "$pid starting at $pct% for $start...$r\n"; $0 = "clip start at $messid" if $DEBUG & 1; } sub CATCH { &openout; open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n"; ($logdev, $logino) = stat LOG; $date = 9_999_999_999 unless $date; &seekdate($date); $tailing = 0; $slept = 300; # force immediate update of lastclip. $SIG{HUP} = CATCH; kill $$, ALRM; } sub RESTART { kill 9, $pmeter if $pmeter; exec "clip"; die "Couldn't exec clip: $!\n"; } sub cdate { ($sec,$min,$hr,$mday,$mon,$year) = localtime($date); sprintf("%02d/%02d/%02d %02d:%02d",$mon+1,$mday,$year,$hr,$min); } sub lidate { $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)# ? &timelocal(0, $5, $4, $2, $1-1, $3) : $_[0]; } sub gidate { $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)# ? &timegm(0, $5, $4, $2, $1-1, $3) : $_[0]; } sub openout { rename("$HOMETMP/n8", "$HOMETMP/n9"); rename("$HOMETMP/n7", "$HOMETMP/n8"); rename("$HOMETMP/n6", "$HOMETMP/n7"); rename("$HOMETMP/n5", "$HOMETMP/n6"); rename("$HOMETMP/n4", "$HOMETMP/n5"); rename("$HOMETMP/n3", "$HOMETMP/n4"); rename("$HOMETMP/n2", "$HOMETMP/n3"); rename("$HOMETMP/n1", "$HOMETMP/n2"); rename("$HOMETMP/n0", "$HOMETMP/n1"); open(STDOUT, ">$HOMETMP/n0"); open(STDERR, ">&STDOUT"); } sub pmeter { # ($loadavg, $pid, $debug) = @ARGV; local($loadavg, $pid, $debug) = @_; $running = 1; $0 = "pmeter @_"; while (kill 0, $pid) { `uptime` =~ /load average:\s+([\d.]+)/ || die "Can't run uptime: $!\n";; if ($1 > $loadavg) { kill 'STOP', $pid; if ($running) { print STDERR "stopping at $1\r\n" if $debug; $0 = "pmeter (stopped $pid at $loadavg)"; $running = 0; } } else { kill 'CONT', $pid; if (!$running) { print STDERR "starting at $1\r\n" if $debug; $0 = "pmeter (started $pid at $loadavg)"; $running = 1; } } sleep 120; } } ############################################################### # These next few lines are legal in both Perl and nroff. .00; # finish .ig 'di \" finish diversion--previous line must be blank .nr nl 0-1 \" fake up transition to first page again .nr % 0 \" start at page 1 '; __END__ ##### From here on it's a standard manual page ##### .TH CLIP 1 "July 14, 1992" .de M \" man page reference \\fI\\$1\\fR\\|(\\$2\)\\$3 .. .AT 3 .SH NAME clip \- personal news clipping service .br pmeter \- start and stop a process according to load average .SH SYNOPSIS .B clip [ .I date ] .SH DESCRIPTION .I Newsclip scans incoming news by following the C news history file as it grows and examining each article listed there for patterns of interest. .PP If a date argument is supplied on the command line (in the form .IR "``mm/dd/yy hh:mm''" , including the embedded space) .I clip will read articles since that date. With no arguments, it examines the user's ~/.lastclip file to know when it left off scanning; if no such file exists, every news article will be scanned. .PP Patterns and options are specified in a .cliprc file situated in your home directory. The .cliprc file is simply a gob of Perl code that will be evaluated by clip after it has set its default options but before it actually goes out to do anything. The only mandatory items are a call to each of two subroutines: &NGSKIP and &SCANNER. A typical .cliprc file looks like this: .nf .ne 6 # Options I want to override. $MAXLOAD = 4; # Newsgroups I always read anyway, so don't bother. &NGSKIP( <<'END' ); &skip if /comp\e.lang\e.perl/; &skip if /rec\e.humor\e.funny/; END .ne 8 # Patterns I'm interested in scanning for. &SCANNER( <<'END' ); &hit if /betty[^\e0]boop/i; &hit if /roger[^\e0]rabbit/i; &hit if /\ebw.*coyote\eb/i; if (/\ebacme\eb/i) {{ next if $` =~ /roadrunner\e@$/; &hit; }} if (/\eblooney\eb/i) {{ next if $' =~ /^\es*bin/; next if $` =~ /!$/; next if $` =~ /fudd\e@/i; next if $nglist =~ /alt\e.crazy\e.people/; &hit; }} END .fi The argument to the &NGSKIP routine is a sequence of zero or more Perl statements that call &skip if the current article is crossposted to a newsgroup that we don't want to scan (generally because we'll read the newsgroup anyway). Before your code is called, the $_ variable is automatically set to the list of newsgroups from the history file. You can invert the logic and just pick the newsgroups you want by saying: .nf .ne 9 &NGSKIP(<<'END'); study; { last if /comp\e.lang\e.perl/; last if /comp\e.org\e.usenix/; last if /comp\e.unix\e.bsd/; &skip; } END .fi .PP The argument to the &SCANNER routine is a sequence of one or more Perl statements that call &hit if an interesting pattern has been spotted. Before your code is called, the $_ variable is set to the current paragraph of the current article (articles are read and scanned paragraph by paragraph). Newsclip will snip excerpts from any paragraph containing patterns, underlining the patterns it found. Note that some patterns are unqualified hits, while others are hits only if some other pattern doesn't match. (It's important to the underlining algorithm that the value of $& continue to contain the value of the last .I successful match \(em all of the exceptions are expected to fail.) For the purposes of exception scanning, the variables $` and $' are automatically set to the preceding and subsequent text by the successful initial pattern match. In addition, .I clip sets the variable $nglist to the current list of newsgroups, and the variable $header to be the header of the current article. .PP The use of double curlies is merely to allow the ``next'' command to fall through to the next test, since ``next'' will simply fall out of an ordinary block. The ``last'' command could also have been used, but would be less intuitive. Other special thingies you might want to use include \eb to assert a word boundary, [^\e0] to match any character but a null, including a newline, and any other regular expression goodies you can think up to detect spelling errors and variants. Tom Christiansen searches for his name with: .nf .ne 4 if (/\ebtchrist\eb/i || /tom[^\e0]christ(ia|e)ns[eo]n/i) {{ # grr... next if /\en\es*Tom Christiansen\es+tchrist\e@convex.com/; &hit; }} .fi .PP .I Newsclip forks another copy of itself, called .IR pmeter , which runs .M uptime 1 every two minutes to make sure the load hasn't gone too high. If it has, .I pmeter suspends .I clip with a SIGSTOP, waiting until the load average goes back down before allowing .I clip to continue. Both .I pmeter and .I clip continually muck with their own externally-visible argument list to keep folks running .M ps 1 amused. This has no effect if your operating system doesn't support changes to argv being noticed by ps. .PP Output by default goes to ~/tmp/n0. When .I clip is hit by a SIGHUP, it renames n8 to n9, n7 to n8, ..., n0 to n1, and creates a new n0 file. For handiness, when .I clip starts up, it creates a tiny script called ``N'' that will send the SIGHUP to the correct process. Typically one runs N in the morning to set up a new n0 file, and then reads ~/tmp/n1 to see what the scanner found since the last time you ran N. A companion script, named ``P'', runs .I ps on the .I clip and .I pmeter processes. (This assumes a BSD-style .I ps program.) .PP The first line of each article reference begins with an @, in case you want to cut and paste that line to something that looks up the article for you. I use the following rn macro. Unfortunately I have to paste the line twice because of how rn eats typeahead, but hey, it beats a kick in the head. This macro should all be on one line \(em it's broken in two so that nroff doesn't get upset: .nf .ne 12 @ %(%m=n?:%(%m!=a?q:)q)%(%"^Jng/art: "=\e([^ @]*\e)/\e([^ ]*\e)? g%`perl -e '($_="%1")=~tr#/#.#;print'`^J.%2^J:%(%m=n?^L:)) .fi .PP Interesting variables to set in your .cliprc file include the following: .TP 15 .B $SPOOLDIR Where news articles are stored; defaults to /usr/spool/news. .TP .B $NEWSLIB Where Cnews keeps its history file; defaults to /usr/lib/news. .TP .B $HOMETMP Where clip should put its output files; defaults to ~/tmp. .TP .B $HOMEBIN Where clip should install N script; defaults to ~/bin. .TP .B $MAXLOAD What load average to suspend at; defaults to 3. .TP .B $NICE What priority to run at; defaults to 16. .TP .B $DEBUG Whether to be noisy; defaults to 0. A value with the 1 bit set causes .I clip to be noisy; a value with the 2 bit set causes .I pmeter to be noisy. .SH ENVIRONMENT HOME or LOGDIR .SH SIGNALS .nf SIGHUP Cycle log files. SIGUSR1 Re-exec oneself in case .cliprc changes. SIGALRM Make clip wake up early. .fi .SH FILES .nf .ta \w'$HOME/.cliprc 'u $HOME/.cliprc Your clipping preferences. $HOME/.lastclip Date and time of last article scanned. $HOMETMP/n[0-9] Log files. $HOMEBIN/N Cycle log files: n8 -> n9, n7 -> n8, etc. .fi .SH AUTHOR Larry Wall, with heckling by Tom Christiansen. .SH "SEE ALSO" .M perl 1 , .M rn 1 , .M ps 1 , .M uptime 1 , .M newsmaint 8 . .SH DIAGNOSTICS Obscure diagnostics are an obsolete concept rooted in the notion that programs must be shoehorned into memory. .SH BUGS There ought to be a way for multiple users to share a single clip process. .PP It doesn't work on NNTP-only systems. .ex