Difference between revisions of "Caretaker.pl"
(not legacy) |
(not needed in sub anymore cos not called by wikid.pl) |
||
| Line 10: | Line 10: | ||
} | } | ||
| − | + | my $dbh = DBI->connect('DBI:mysql:od',lc $::peer,$::pwd1) or die DBI->errstr; | |
| − | + | my @ns = ('','Talk:','User:','User talk:','Project:','Project talk:', | |
| − | + | 'Image:','Image talk:','Mediawiki:','Mediawiki talk:','Template:', | |
| − | + | 'Template talk:','Help:','Help talk:','Category:','Category talk:'); | |
| − | |||
| − | + | # Get id of first article after mediawiki built in articles | |
| − | + | my $sth = $dbh->prepare('SELECT cur_id FROM cur WHERE cur_title = "Zhconversiontable"'); | |
| − | + | $sth->execute(); | |
| − | + | my @row = $sth->fetchrow_array; | |
| − | + | $sth->finish; | |
| − | + | my $first = $row[0]+1; | |
| − | + | # Get last article id | |
| − | + | my $sth = $dbh->prepare('SELECT cur_id FROM cur ORDER BY cur_id DESC'); | |
| − | $sth->execute(); | + | $sth->execute(); |
| + | @row = $sth->fetchrow_array; | ||
| + | $sth->finish; | ||
| + | my $last = $row[0]; | ||
| + | |||
| + | # Loop through all articles one per second | ||
| + | my $sth = $dbh->prepare('SELECT cur_namespace,cur_title,cur_is_redirect FROM cur WHERE cur_id=?'); | ||
| + | my $done = 'none'; | ||
| + | for ($first..$last) { | ||
| + | $sth->execute($_); | ||
@row = $sth->fetchrow_array; | @row = $sth->fetchrow_array; | ||
| − | + | my @comments = (); | |
| − | my $ | + | my $title = $ns[$row[0]].$row[1]; |
| + | if ($title && ($row[2] == 0)) { | ||
| − | + | # Read the article content | |
| − | + | $::article = wikiRawPage $::wiki, $title; | |
| − | + | $::article =~ s/^\s+//; | |
| − | + | $::article =~ s/\s+$//; | |
| − | $ | + | my $backup = $::article; |
| − | + | my $isCode = $title =~ /\.(as|c|cpp|c\+\+|h|sh|css|html?|pl|php|R|tex|xml|xslt)$/i; | |
| − | my | ||
| − | |||
| − | |||
| − | + | # --------------------------------------------------------------------------------------------------------------------- # | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | + | if (!$isCode) { | |
| − | if ( | + | # Format catlinks and normal links |
| + | my $tmp = $::article; | ||
| + | my @links = $::article =~ /\[{2}category:([^\]]+)\]{2}/ig; | ||
| + | my %links = (); $links{$_} = 1 for @links; | ||
| + | my $cats = ''; | ||
| + | if ($title =~ /^[a-z0-9-]+\.(com?|edu|govt?|net|org|mil|info|school|iwi|ac|geek|aero|biz|coop|museum|name|pro)(\.[a-z]{2})?$/i) { | ||
| + | push @comments, 'cat in [[Category:Domain names|Domain names]]' | ||
| + | unless $links{'Domain names'}++; | ||
| + | } | ||
| + | if ($::article =~ /<\/math>/) { | ||
| + | push @comments, 'cat in [[Category:Articles containing maths|Maths]]' | ||
| + | unless $links{'Articles containing maths'}++; | ||
| + | } | ||
| + | @links = sort keys %links; | ||
| + | if ($#links >= 0) { | ||
| + | for (@links) { tr/_/ /; $cats .= "[[Category:$_]]"; } | ||
| + | $::article =~ s/\s*\[{2}\s*category:[^\]]+\s*\]{2} *//sig; | ||
| + | $::article =~ s/^\s+//; | ||
| + | $::article =~ s/\s+$//; | ||
| + | $::article = "$cats\n$::article"; | ||
| + | push @comments, 'categories' if $::article ne $tmp; | ||
| + | } | ||
| − | + | # Format normal links | |
| − | + | $tmp = $::article; | |
| − | + | $::article =~ s/(\[{2}[^\]\r\n]+\]{2})/formatLink($1)/eg; | |
| − | + | push @comments, 'links' if $::article ne $tmp; | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | + | # Format headings | |
| − | + | $tmp = $::article; | |
| − | + | my $min = 10; | |
| − | + | for ($::article =~ /^(=+) *.+? *=+ *$/mg) { $min = length if $min > length } | |
| − | + | $::article =~ s/[\r\n]+ *={$min}(=*) *([^=\r\n]+?) *=+ *[\r\n]+/\n\n$1 $2 $1\n/g if --$min; | |
| − | + | push @comments, 'headings' if $::article ne $tmp; | |
| − | + | } | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | + | # Common typo's - put square brackets around first letter so it doesn't get caretaken itself! | |
| − | + | my $typos | |
| − | + | = $::article =~ s/[i]mpliment/implement/g | |
| − | + | + $::article =~ s/[d]ependance/dependence/g | |
| − | + | + $::article =~ s/[h]ardwire/hard-wire/g; | |
| − | + | push @comments, ($typos == 1 ? 'typo' : 'typos') if $typos; | |
| − | + | # Apply rules to Xml:Articles | |
| − | + | if ($title =~ /^Xml:/) { | |
| − | + | # Remove empty elements | |
| − | + | push @comments, 'removed empty elements' if $::article =~ s/^\s*<(read|write|category|init|data|view|edit|save)>\s*<\/\w+>[\r\n]*//gms; | |
| − | + | } | |
| − | + | # --------------------------------------------------------------------------------------------------------------------- # | |
| − | + | # If article changed, write and comment | |
| − | + | $::article =~ s/^\s+//; | |
| − | + | $::article =~ s/\s+$//; | |
| − | + | if ($::article ne $backup) { | |
| − | + | wikiPageEdit $::wiki, $title, $::article, '[[talk:caretaker.pl|Caretaker]]: '.join(', ',@comments), 1; | |
| − | + | $done = $done + 1; | |
| − | |||
} | } | ||
| − | |||
} | } | ||
| + | sleep(1); | ||
| + | } | ||
| − | + | my $comment = "Daily [[talk:caretaker.pl|caretaking]] tasks completed: ".($last-$first)." articles examined, ".($done-1)." adjusted."; | |
| − | + | logAdd $comment; | |
| − | + | wikiPageAppend($::wiki, $::wikilog, "\n*".localtime()." : $comment", $comment); | |
| − | + | $sth->finish; | |
| − | + | $dbh->disconnect; | |
| − | |||
Revision as of 22:48, 11 July 2008
- !/usr/bin/perl
- Our Perl scripts.
use DBI;
sub formatLink { my $link = shift; $link =~ tr/_/ /; $link =~ s/\s*([\[\]\|])\s*/$1/g; return $link; }
my $dbh = DBI->connect('DBI:mysql:od',lc $::peer,$::pwd1) or die DBI->errstr; my @ns = (,'Talk:','User:','User talk:','Project:','Project talk:', 'Image:','Image talk:','Mediawiki:','Mediawiki talk:','Template:', 'Template talk:','Help:','Help talk:','Category:','Category talk:');
- Get id of first article after mediawiki built in articles
my $sth = $dbh->prepare('SELECT cur_id FROM cur WHERE cur_title = "Zhconversiontable"'); $sth->execute(); my @row = $sth->fetchrow_array; $sth->finish; my $first = $row[0]+1;
- Get last article id
my $sth = $dbh->prepare('SELECT cur_id FROM cur ORDER BY cur_id DESC'); $sth->execute(); @row = $sth->fetchrow_array; $sth->finish; my $last = $row[0];
- Loop through all articles one per second
my $sth = $dbh->prepare('SELECT cur_namespace,cur_title,cur_is_redirect FROM cur WHERE cur_id=?'); my $done = 'none'; for ($first..$last) { $sth->execute($_); @row = $sth->fetchrow_array; my @comments = (); my $title = $ns[$row[0]].$row[1]; if ($title && ($row[2] == 0)) {
# Read the article content $::article = wikiRawPage $::wiki, $title; $::article =~ s/^\s+//; $::article =~ s/\s+$//; my $backup = $::article; my $isCode = $title =~ /\.(as|c|cpp|c\+\+|h|sh|css|html?|pl|php|R|tex|xml|xslt)$/i;
# --------------------------------------------------------------------------------------------------------------------- #
if (!$isCode) {
# Format catlinks and normal links my $tmp = $::article; my @links = $::article =~ /\[{2}category:([^\]]+)\]{2}/ig; my %links = (); $links{$_} = 1 for @links; my $cats = ; if ($title =~ /^[a-z0-9-]+\.(com?|edu|govt?|net|org|mil|info|school|iwi|ac|geek|aero|biz|coop|museum|name|pro)(\.[a-z]{2})?$/i) { push @comments, 'cat in' unless $links{'Domain names'}++; } if ($::article =~ /<\/math>/) { push @comments, 'cat in' unless $links{'Articles containing maths'}++; } @links = sort keys %links; if ($#links >= 0) { for (@links) { tr/_/ /; $cats .= ""; } $::article =~ s/\s*\[{2}\s*category:[^\]]+\s*\]{2} *//sig; $::article =~ s/^\s+//; $::article =~ s/\s+$//; $::article = "$cats\n$::article"; push @comments, 'categories' if $::article ne $tmp; }
# Format normal links $tmp = $::article; $::article =~ s/(\[{2}[^\]\r\n]+\]{2})/formatLink($1)/eg; push @comments, 'links' if $::article ne $tmp;
# Format headings $tmp = $::article; my $min = 10; for ($::article =~ /^(=+) *.+? *=+ *$/mg) { $min = length if $min > length } $::article =~ s/[\r\n]+ *={$min}(=*) *([^=\r\n]+?) *=+ *[\r\n]+/\n\n$1 $2 $1\n/g if --$min; push @comments, 'headings' if $::article ne $tmp; }
# Common typo's - put square brackets around first letter so it doesn't get caretaken itself! my $typos = $::article =~ s/[i]mpliment/implement/g + $::article =~ s/[d]ependance/dependence/g + $::article =~ s/[h]ardwire/hard-wire/g; push @comments, ($typos == 1 ? 'typo' : 'typos') if $typos;
# Apply rules to Xml:Articles if ($title =~ /^Xml:/) {
# Remove empty elements push @comments, 'removed empty elements' if $::article =~ s/^\s*<(read|write|category|init|data|view|edit|save)>\s*<\/\w+>[\r\n]*//gms;
}
# --------------------------------------------------------------------------------------------------------------------- #
# If article changed, write and comment $::article =~ s/^\s+//; $::article =~ s/\s+$//; if ($::article ne $backup) { wikiPageEdit $::wiki, $title, $::article, 'Caretaker: '.join(', ',@comments), 1; $done = $done + 1; } } sleep(1); }
my $comment = "Daily caretaking tasks completed: ".($last-$first)." articles examined, ".($done-1)." adjusted."; logAdd $comment; wikiPageAppend($::wiki, $::wikilog, "\n*".localtime()." : $comment", $comment); $sth->finish; $dbh->disconnect;



