Uzanto:SistMesEspigRoboto/fontokodo
Fontokodo de la roboto
redaktiAtentigo: aperas ĉi-sube "</pre__FORIGU__>". Devus esti </pre> sed ne facilas montri tion en la fontokodo, kiu mem estas inter <pre> kaj </pre>. Do vi devos ŝanĝi ĝin.
Por mem uzi la programon, vi devus ŝanĝi ankaŭ la uzantonomon (serĉu "userid"), la pasvorton (serĉu "password"). Ŝanĝu ankaŭ proxy (mi forgesis la esperantan vorton por tio...) - ĝi estu en formato: http://host:port/ - aŭ simple forigu la linion sube pri ĝi ("$ua->proxy('http',$proxy)") laŭ bezono.
#!/usr/bin/perl
use strict;
use LWP;
use LWP::UserAgent;
use HTML::Form;
use Encode;
my $proxy = '<DETALOJ KAŜITAJ>';
my $url_stem='http://eo.wikipedia.org/w/index.php?title=';
my $login_form_url=$url_stem.'Speciala:Userlogin';
# page name is for edit summary; URL may contain UTF8 instead of "x"s
my $input_page_name='Vikipedio:Sxangxendaj sistemaj mesagxoj';
my $input_page_url=$url_stem.'Vikipedio:%C5%9Can%C4%9Dendaj_sistemaj_mesa%C4%9Doj';
# report page
my $report_page_name='Vikipediisto:SistMesEspigRoboto/raporto';
my $report_page_url=$url_stem.'Vikipediisto:SistMesEspigRoboto/raporto';
my $userid='SistMesEspigRoboto';
my $password='<DETALOJ KAŜITAJ>';
my ($response, $form, $content);
#-----------------------------------------------------------
# initialise
my $ua = LWP::UserAgent->new() or return (undef, "no lwp");
$ua->cookie_jar({});
$ua->proxy('http',$proxy);
#-----------------------------------------------------------
# log in
print "Logging in\n";
$response = $ua->get($login_form_url);
$form = find_form ($response, 'name', 'userlogin');
$form->param('wpName',$userid);
$form->param('wpPassword',$password);
$response = $ua->request($form->click());
die "login failed" unless $response->content =~ /var wgUserName = "$userid"/;
#-----------------------------------------------------------
# read input page:
print "Reading input page\n";
$response = $ua->get($input_page_url.'&action=edit');
# and the wikisource from the edit box
$form = find_form ($response, 'name', 'editform' );
$content=$form->param('wpTextbox1');
# and parse it
my ($page_titles_ref, $page_contents_ref, $newcontent)
= parse_input_page($content);
my @page_titles=@$page_titles_ref;
my @page_contents=@$page_contents_ref;
# get the revision ID
$response->content =~ /var wgCurRevisionId = "([0-9]+)";/
or die "can't get revision ID";
my $id=$1;
# also get the "raw" content (for that revision ID) and parse it
# REASON: this will give us a version with the "x"s etc substituted
$response = $ua->get($input_page_url."&oldid=$id&action=raw");
$content = $response->content;
my ($raw_page_titles_ref) = parse_input_page($content);
my @raw_page_titles = @$raw_page_titles_ref;
# sanity check - there should be the same number of these
my $num_edits = scalar @page_titles;
die "mismatch between raw and edit-box page titles"
unless $num_edits == scalar @raw_page_titles;
#-----------------------------------------------------------
print "There are $num_edits edits to do.\n";
exit if ($num_edits == 0);
# write the edited input page
print "Writing edited input page\n";
$form->param('wpTextbox1',$newcontent);
$form->param('wpSummary',"Roboto akceptis $num_edits petitajn redaktojn");
# uncomment if edit should be marked as minor - see also below
#$form->param('wpMinoredit',1);
utf8_encode($form);
$response = $ua->request($form->click());
#-----------------------------------------------------------
# now write each of the edited pages...
for my $seq (0 .. $#page_titles) {
my $raw_page_title = $raw_page_titles[$seq];
print "Editing $raw_page_title:\n";
print " getting edit form\n";
$response = $ua->get($url_stem.$raw_page_title.'&action=edit');
$form = find_form ($response, 'name', 'editform' );
$form->param('wpTextbox1',$page_contents[$seq]);
$form->param('wpSummary',"Roboto kopias enhavon el [[$input_page_name]]");
#$form->param('wpMinoredit',1);
print " saving\n";
utf8_encode($form);
$response = $ua->request($form->click());
#sleep(10);
}
# and now append to the report page...
print "Writing report:\n";
my @date = gmtime(time);
my $date_string = sprintf("%04d-%02d-%02d %02d:%02d:%02d",
$date[5]+1900,$date[4]+1,$date[3],$date[2],$date[1],$date[0]);
print " getting edit form\n";
$response = $ua->get($report_page_url.'&action=edit');
$form = find_form ($response, 'name', 'editform' );
my $report_page_contents = $form->param('wpTextbox1');
$report_page_contents .= <<EOF;
=== Redaktoj je $date_string ===
La roboto redaktis la jena(j)n pagxo(j)n je proksimume $date_string.
<table border="1" cellpadding="5" cellspacing="0">
<tr><th>Pagxo</th><th>Vikiteksto</th><th>Aperas kiel</th></tr>
EOF
for my $seq (0 .. $#page_titles) {
my $page_title = $page_titles[$seq];
$report_page_contents .= <<EOF;
<tr>
<td>
[[$page title]]
</td>
<td>
<nowiki><pre></nowiki>
$page_contents[$seq]
<nowiki></pre__FORIGU__></nowiki>
</td>
<td>
$page_contents[$seq]
</td>
</tr>
EOF
}
$report_page_contents .= <<EOF;
</table>
EOF
$form->param('wpTextbox1',$report_page_contents);
$form->param('wpSummary',"Roboto aldonas raporton je $date_string");
#$form->param('wpMinoredit',1);
print " saving\n";
utf8_encode($form);
$response = $ua->request($form->click());
print "Done\n";
#================================================
# subroutines
sub utf8_encode
{
# utf8 encode all parameters of a form
my $form = shift;
my @names = $form->param;
for my $name (@names) {
my $val = $form->param($name);
my $newval = encode("utf8",$val);
$form->param($name,$newval);
}
return $form;
}
sub find_form
{
my ($uaobj, $name, $val) = @_;
my @forms = HTML::Form->parse( $uaobj );
my @matching_forms = grep {$_->attr($name) eq $val} @forms;
die "There is not exactly one form whose attribute $name is $val"
if scalar @matching_forms != 1;
return $matching_forms[0];
}
sub parse_input_page
{
my $content = shift;
my @lines = split("\n",$content);
my @page_titles = ();
my @page_contents = ();
my $newcontent; # for input page
my $page_title = undef;
my $page_content;
# parse the page into required pages
for my $line (@lines) {
if (defined ($page_title)) {
if ($line =~ /^:\@%fino\s*$/) {
# end of required page
push @page_titles, $page_title;
push @page_contents, $page_content;
$page_title = undef;
} else {
# in required page
$page_content .= "$line\n";
}
} elsif ($line =~ (/^:\@%\[\[(.*)\]\]\s*$/)) {
# start of required page
$page_title = $1;
$page_content = '';
} else {
# outside required page
$newcontent .= "$line\n";
}
}
if (defined $page_title) {
# unterminated output page content at end of processing gets
# shoved back onto end of input page
$newcontent .= $page_content;
}
return (\@page_titles, \@page_contents,$newcontent);
}