#! /usr/bin/perl # working version, not documented # news lent script (rss) use XML::Parser::Lite; use Socket; use Encode 'from_to'; my $home="./dir"; my $temp="."; my $xmlname="$home/newz"; my $htmlname="$home/newz"; $|=1; my $proxy='195.50.2.154:8080'; my $html_enc = 'windows-1251'; my %cont=( 'mahatma.bspu.unibel.by'=>1, 'www.bspu.unibel.by'=>1 ); sub add_cont{ my $l=$_[0].'/'; my $x; $l=~s/http\:\/\/(.*?)[\:\/]/$x=$1;''/gsei; $cont{$x}=1; } sub esc{ local $1; my $x=shift; $x=~s/([\x00-\x1f,:\"\'\\\/])/sprintf('%%%02X',ord($1))/eg; $x; } sub unesc{ my $x=shift; local $1; $x=~s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; $x; } sub get_xml{ my ($s,%h,@a,@a1,@ad); @a=split(/:\/\//,$_[0],2); unshift @a,'http' if(!defined($a[1])); @a[1,3]=split(/\//,$a[1],2); @ad=@a[1,2]=split(/:/,$a[1],2); @a1=@a; $ad[1]||=80; $a1[0]&&="$a[0]://"; $a1[2]&&=":$a[2]"; $a1[3]&&="/$a[3]"; if($proxy){ @ad=split(/:/,$proxy,2); $a1[3]=join('',@a1); } socket(SO,PF_INET,SOCK_STREAM,PROTO_TCP)&& connect(SO,sockaddr_in($ad[1],inet_aton($ad[0])))||return; select(SO);$|=1;select(STDOUT); print SO qq($_[1] $a1[3] HTTP/1.1 Host: $a1[1] User-Agent: robot Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 Accept-Language: ru,be;q=0.8,en-us;q=0.5,en;q=0.3 Accept-Encoding: none $_[2]Connection: close ); while((!eof(SO))&&defined(my $x=)){$s.=$x;$x=~s/[\r\n]*//gs;$x||last} $s=~s/(.*?): (.*?)[\r]\n/$h{lc($1)}=$2/gise; undef $s; while((!eof(SO))&&defined(my $x=)){$s.=$x} close(SO); substr($s,index($s,'$t" } my (@block,%item,@items); ## 'id'=>[start,char,end,start1,char1.end1]; my %blocks=( 'rss.channel.item'=>[ sub{%item=()}, undef, sub{push @items,{%item};undef %item}, undef, sub{shift;$item{$block[$#block]}=join('',@_)}, undef ], 'rss.channel.item.link'=>[ ] ); #$blocks{'rdf:RDF.item'}=$blocks{'rss.channel.item'}; my %handlers=( Start=>sub{parser_event(3,@_);push @block,$_[1];unshift @_,0;goto &parser_event}, Char=>sub{parser_event(1,@_);unshift @_,4;goto &parser_event}, End=>sub{parser_event(2,@_);while($_[1] ne pop @block){};unshift @_,5;goto &parser_event}, ); sub parser_event{ my $e=shift; my $id=join('.',@block[0..$#block-($e>3)]); #print "$id\n"; if(exists($blocks{$id})){ my $h=$blocks{$id}; goto ref($h)||return; HASH:return $h->{('Start','Char','End')[$e]}(@_); ARRAY:return defined(@$h[$e])?&{@$h[$e]}(@_):undef; SCALAR:return; } } sub get_rss{ my ($x,%h,$ff,$ffb,$t,$p); if(substr($_[0],0,7) eq 'file://'){ $ff=substr($_[0],7); return if(!-e $ff); goto FILE; } add_cont($_[0]); #print "get $_[0]\n"; ($x,%h)=get_xml($_[0],'HEAD'); return if(!defined(%h)); $h{'last-modified'}||=gmtime; if(exists($h{'last-modified'})){ $ffb="$temp/newz-".esc($_[0]); stat($ff="$ffb.".esc($h{'last-modified'})); if(-e _){ FILE: open FF,"<$ff" or die "$!"; sysread(FF,$x,-s FF); close(FF); }else{ ($x,%h)=get_xml($_[0],'GET'); return if(!defined($x)); while(my $d=<$ffb.*>){unlink($d);} open FF,">$ff" or die "$! $ff"; print FF $x; close(FF); } }else{($x,%h)=get_xml($_[0],'GET'); $h{'last-modified'}.=localtime; } $p=new XML::Parser::Lite; $p->setHandlers(%handlers); my ($enc,@a); $x=~s/\<\?xml(.*?)\>/push @a,$1;"\<\?xml$1\>"/ges; for(@a){$_=~s/ encoding\=\"(.*?)\"/$enc=lc($1);''/ges} from_to($x,$enc,'utf-8') if(defined($enc) && $enc ne 'utf-8'); $p->parse($x); } my %htm=( 'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"' ); sub dehtml{ my $s=shift; $s=~s/\&(.*?)\;/$htm{$1}||"&$1;"/gse; $s } my (@news0,%news,%nh); my %mm=('Jan'=>0,'Feb'=>1,'Mar'=>2,'Apr'=>3,'May'=>4,'Jun'=>5,'Jul'=>6,'Aug'=>7,'Sep'=>8,'Oct'=>9,'Nov'=>10,'Dec'=>11); sub addnews{ my ($l,$x); my $u=$_[0]; for(my $i=$#items;$i>=0;$i--){ $l=$items[$i]->{link}; if($u){ $x=$l.'&'; $x=~s/[\&\?\;]$u\=(.*?)\&/$l=dehtml($1);''/gse; $l="http://$l" if(index($l,'://')==-1); $items[$i]->{xlink}=$l; }; if(!exists($nh{$l})){ $nh{$l}=$items[$i]; my $t=$items[$i]->{pubDate}; my $t1=0; $t=~s/([0-9]{2})\:([0-9]{2})\:([0-9]{2})/$t1=$3+($2+$1*60)*60;''/e; $t=~s/([0-9]{1,2}) ([a-zA-Z]{3}) ([0-9]{4})/$t1+=($1+$mm{$2}*31+$3*365)*24*60*60;''/e; $t=~s/\+0([0-9])00/$t1-=$1*60*60;''/ex; $t=~s/\-0([0-9])00/$t1+=$1*60*60;''/ex; $news{$t1}=$items[$i]; add_cont($l); } } undef @items; } my $time=gmtime; #print qq(Content-type: text/html; codepage=utf-8 #Last-modified: $time # my $time1=$time; $time1=~s/ / /gs; my $fh=qq(Breaking News!
Open Source News$time1
). url("dir/")."

Contributors:
"; for(sort keys %cont){ $fh.=url("http://".unesc($_)).'
'; } from_to($fh,'utf-8',$html_enc); open FH,">$htmlname.tmp.html" or die "$! $htmlname.tmp.html"; print FH $fh; close(FH); print FH "\n\n"; close(FH); if($ARGV[0] eq 'rotate'){ for('xml','html'){ my $x=eval("\$".$_."name"); my $f0="$x.$_"; my $f1="$x.".quotemeta($time).".$_"; `mv -f $f0 $f1` if(-e $f0); } } `mv -f $htmlname.tmp.html $htmlname.html`;