Difference between revisions of "Wiki.pl"

From Organic Design
Jump to: navigation, search
m
(fix domain issue)
Line 1: Line 1:
 
#!/usr/bin/perl
 
#!/usr/bin/perl
# - based on wiki.pl
+
# - based on wiki.pl{{perl}}{{Category:Robots}}
 
# - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html)
 
# - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html)
 
# - Authors: [http://www.organicdesign.co.nz/Nad Nad], [http://www.organicdesign.co.nz/Sven Sven]
 
# - Authors: [http://www.organicdesign.co.nz/Nad Nad], [http://www.organicdesign.co.nz/Sven Sven]
Line 9: Line 9:
 
# http://www.mediawiki.org/wiki/Manual:Parameters_to_index.php#What_to_do
 
# http://www.mediawiki.org/wiki/Manual:Parameters_to_index.php#What_to_do
 
# - Provides details on required name fields in forms
 
# - Provides details on required name fields in forms
+
 
 
# NOTES REGARDING CHANGING TO PERL PACKAGE AND ADDING API SUPPORT
 
# NOTES REGARDING CHANGING TO PERL PACKAGE AND ADDING API SUPPORT
 
# - constructor:
 
# - constructor:
Line 16: Line 16:
 
#  - get namespaces
 
#  - get namespaces
 
#  - get messages used in patterns (and make methods use messages in their regexp's so lang-independent)
 
#  - get messages used in patterns (and make methods use messages in their regexp's so lang-independent)
+
 
 
use HTTP::Request;
 
use HTTP::Request;
 
use LWP::UserAgent;
 
use LWP::UserAgent;
 
use POSIX qw(strftime);
 
use POSIX qw(strftime);
+
 
 
sub wikiLogin;
 
sub wikiLogin;
 
sub wikiLogout;
 
sub wikiLogout;
Line 42: Line 42:
 
sub wikiExamineBraces;
 
sub wikiExamineBraces;
 
sub wikiGuid;
 
sub wikiGuid;
+
 
 
# Set up a global client for making HTTP requests as a browser
 
# Set up a global client for making HTTP requests as a browser
 
$::client = LWP::UserAgent->new(
 
$::client = LWP::UserAgent->new(
Line 48: Line 48:
 
agent      => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.14)',
 
agent      => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.14)',
 
from      => 'wiki.pl@organicdesign.co.nz',
 
from      => 'wiki.pl@organicdesign.co.nz',
timeout    => 20,
+
timeout    => 10,
 
max_size  => 100000
 
max_size  => 100000
 
);
 
);
+
 
 
sub logAdd {
 
sub logAdd {
 
my $entry = shift;
 
my $entry = shift;
Line 60: Line 60:
 
return $entry;
 
return $entry;
 
}
 
}
+
 
sub logHash {
 
sub logHash {
 
my $href = shift;
 
my $href = shift;
Line 67: Line 67:
 
}
 
}
 
}
 
}
+
 
 
# Login to a MediaWiki
 
# Login to a MediaWiki
 
# todo: check if logged in first
 
# todo: check if logged in first
Line 78: Line 78:
 
my $html = '';
 
my $html = '';
 
if ($::client->get($url)->is_success) {
 
if ($::client->get($url)->is_success) {
my %form = (wpName => $user, wpPassword => $pass, wpLoginattempt => 'Log in', wpDomain => $domain, wpRemember => '1');
+
my %form = (wpName => $user, wpPassword => $pass, wpDomain => $domain, wpLoginattempt => 'Log in', wpRemember => '1');
 
my $response = $::client->post("$url&action=submitlogin&type=login", \%form);
 
my $response = $::client->post("$url&action=submitlogin&type=login", \%form);
 
$html = $response->content;
 
$html = $response->content;
Line 88: Line 88:
 
}
 
}
 
else {
 
else {
if ($html =~ /<div class="errorbox">\s*(<h2>.+?<\/h2>\s*)?(.+?)\s*<\/div>/is) { logAdd "ERROR: $2: $html" }
+
if ($html =~ /<div class="errorbox">\s*(<h2>.+?<\/h2>\s*)?(.+?)\s*<\/div>/is) { logAdd "ERROR: $2" }
 
else { logAdd "ERROR: couldn't log $user in to $wiki!" }
 
else { logAdd "ERROR: couldn't log $user in to $wiki!" }
 
}
 
}
Line 94: Line 94:
 
return $success;
 
return $success;
 
}
 
}
+
 
 
# Logout of a MediaWiki
 
# Logout of a MediaWiki
 
sub wikiLogout {
 
sub wikiLogout {
Line 104: Line 104:
 
return $success;
 
return $success;
 
}
 
}
+
 
 
# Edit a MediaWiki page
 
# Edit a MediaWiki page
 
# todo: don't return success if edited succeeded but made no changes
 
# todo: don't return success if edited succeeded but made no changes
Line 113: Line 113:
 
my $err = 'ERROR';
 
my $err = 'ERROR';
 
my $retries = 1;
 
my $retries = 1;
+
 
while ($retries--) {
 
while ($retries--) {
 
    my @matches;
 
    my @matches;
Line 121: Line 121:
 
$response->content =~ m|<input type='hidden' value="(.+?)" name="wpEditToken" />|g
 
$response->content =~ m|<input type='hidden' value="(.+?)" name="wpEditToken" />|g
 
)) {
 
)) {
+
 
 
# Got token etc, now submit an edit-form
 
# Got token etc, now submit an edit-form
 
my %form = (
 
my %form = (
Line 130: Line 130:
 
);
 
);
 
$form{wpMinoredit} = 1 if $minor;
 
$form{wpMinoredit} = 1 if $minor;
+
 
my $tokens = @{[$response->content =~ m|(<input type='hidden'.+type="hidden" value=".*?" />)|gs]};
 
my $tokens = @{[$response->content =~ m|(<input type='hidden'.+type="hidden" value=".*?" />)|gs]};
+
 
 
# Grabbing fields separately as hidden input order may vary in global regex
 
# Grabbing fields separately as hidden input order may vary in global regex
 
$response->content =~ m|<input type='hidden' value="(.*?)" name="wpSection" />|    && ($form{wpSection} = $1);
 
$response->content =~ m|<input type='hidden' value="(.*?)" name="wpSection" />|    && ($form{wpSection} = $1);
Line 139: Line 139:
 
$response->content =~ m|<input name="wpAutoSummary" type="hidden" value="(.*?)" />| && ($form{wpAutoSummary} = $1);
 
$response->content =~ m|<input name="wpAutoSummary" type="hidden" value="(.*?)" />| && ($form{wpAutoSummary} = $1);
 
$response = $::client->post("$wiki?title=$title&action=submit", \%form);
 
$response = $::client->post("$wiki?title=$title&action=submit", \%form);
+
 
if ($response->content =~ /Someone else has changed this page/) {
+
if ($response->content =~ /<!-- start content -->Someone else has changed this page/) {
 
$err = 'EDIT CONFLICT';
 
$err = 'EDIT CONFLICT';
 
$retries = 0;
 
$retries = 0;
Line 150: Line 150:
 
return $success;
 
return $success;
 
}
 
}
+
 
 
# Append a wiki page
 
# Append a wiki page
 
sub wikiAppend {
 
sub wikiAppend {
Line 158: Line 158:
 
return wikiEdit($wiki,$title,$content.$append,$comment);
 
return wikiEdit($wiki,$title,$content.$append,$comment);
 
}
 
}
+
 
 
# Get the date of last edit of an article
 
# Get the date of last edit of an article
 
sub wikiLastEdit {
 
sub wikiLastEdit {
Line 166: Line 166:
 
return $1 if $response->is_success and $response->content =~ /<a.+?>(\d+:\d+.+?\d)<\/a>/;
 
return $1 if $response->is_success and $response->content =~ /<a.+?>(\d+:\d+.+?\d)<\/a>/;
 
}
 
}
+
 
 
# Retrieve the raw content of a page
 
# Retrieve the raw content of a page
 
sub wikiRawPage {
 
sub wikiRawPage {
Line 173: Line 173:
 
return $response->content if $response->is_success;
 
return $response->content if $response->is_success;
 
}
 
}
+
 
 
# Return a hash of sections, each containing text, lists, links and templates
 
# Return a hash of sections, each containing text, lists, links and templates
 
# - if only one parameter supplied, then it's assumed to be the wikitext content to extract structure from
 
# - if only one parameter supplied, then it's assumed to be the wikitext content to extract structure from
Line 183: Line 183:
 
/(.+?)\s*=+\s*(.+?)\s*/s;
 
/(.+?)\s*=+\s*(.+?)\s*/s;
 
my($heading,$content) = ($1, $2);
 
my($heading,$content) = ($1, $2);
+
 
# todo: extract lists, links, templates from content
 
# todo: extract lists, links, templates from content
+
 
# if heading, add a node and put content, lists, links in it, else put under root
 
# if heading, add a node and put content, lists, links in it, else put under root
 
if ($1) {
 
if ($1) {
Line 193: Line 193:
 
return %page;
 
return %page;
 
}
 
}
+
 
 
# Returns mediawiki version string
 
# Returns mediawiki version string
 
sub wikiGetVersion {
 
sub wikiGetVersion {
Line 200: Line 200:
 
return $1 if $response->content =~ /MediaWiki.+?: ([0-9.]+[0x20-0x7e]+)/;
 
return $1 if $response->content =~ /MediaWiki.+?: ([0-9.]+[0x20-0x7e]+)/;
 
}
 
}
+
 
 
# Return a hash (number => name) of the wiki's namespaces
 
# Return a hash (number => name) of the wiki's namespaces
 
sub wikiGetNamespaces {
 
sub wikiGetNamespaces {
Line 208: Line 208:
 
return ($1 =~ /<option.*?value="([0-9]+)".*?>(.+?)<\/option>/gs, 0 => '');
 
return ($1 =~ /<option.*?value="([0-9]+)".*?>(.+?)<\/option>/gs, 0 => '');
 
}
 
}
+
 
 
# Returns hash (anchor => href) list elements in article content
 
# Returns hash (anchor => href) list elements in article content
 
sub wikiGetList {
 
sub wikiGetList {
 
my( $wiki, $title ) = @_;
 
my( $wiki, $title ) = @_;
 
my $response = $::client->get("$wiki?title=$title");
 
my $response = $::client->get("$wiki?title=$title");
$response->content =~ /(.+)/s;
+
$response->content =~ /<!-- start content -->(.+)<!-- end content -->/s;
 
my $html = $1;
 
my $html = $1;
 
my %list = $html =~ /<li>.*?<a.*?href="(.+?)".*?>(.+?)<\/a>\s*<\/li>/gs;
 
my %list = $html =~ /<li>.*?<a.*?href="(.+?)".*?>(.+?)<\/a>\s*<\/li>/gs;
Line 220: Line 220:
 
return %tmp;
 
return %tmp;
 
}
 
}
+
 
 
# Todo error checking on the type of failure, e.g. no user rights to delete
 
# Todo error checking on the type of failure, e.g. no user rights to delete
 
# Capture error if article already deleted
 
# Capture error if article already deleted
Line 249: Line 249:
 
return $success;
 
return $success;
 
}
 
}
+
 
 
# Todo logAdd the revision/all revisions
 
# Todo logAdd the revision/all revisions
 
sub wikiRestore {
 
sub wikiRestore {
Line 266: Line 266:
 
my %form = (wpComment => $reason, target => $title, wpEditToken => $1, restore=>"Restore");
 
my %form = (wpComment => $reason, target => $title, wpEditToken => $1, restore=>"Restore");
 
my @timestamps = $response->content =~ m/<input .*?"(ts\d*?)".*?/g;
 
my @timestamps = $response->content =~ m/<input .*?"(ts\d*?)".*?/g;
+
 
# Restore specified $revision  
 
# Restore specified $revision  
 
if ($revision) {  
 
if ($revision) {  
Line 288: Line 288:
 
return $success;
 
return $success;
 
}
 
}
+
 
+
 
 
# Upload a files into a wiki using its Special:Upload page
 
# Upload a files into a wiki using its Special:Upload page
 
sub wikiUploadFile {
 
sub wikiUploadFile {
Line 309: Line 309:
 
my $response = $::client->post($url, \%form, Content_Type => 'form-data');
 
my $response = $::client->post($url, \%form, Content_Type => 'form-data');
 
$success = $response->is_success;
 
$success = $response->is_success;
+
 
 
# Check if file is already uploaded
 
# Check if file is already uploaded
 
if ($success && $response->content =~ m/Upload warning.+?(A file with this name exists already|File name has been changed to)/s) {
 
if ($success && $response->content =~ m/Upload warning.+?(A file with this name exists already|File name has been changed to)/s) {
Line 325: Line 325:
 
     return $success;
 
     return $success;
 
}
 
}
+
 
+
 
 
# Delete an uploaded file from a wiki
 
# Delete an uploaded file from a wiki
 
sub wikiDeleteFile {
 
sub wikiDeleteFile {
Line 356: Line 356:
 
logAdd("Deleted Image:$imagename");
 
logAdd("Deleted Image:$imagename");
 
$response = $::client->post("$url", \%form);
 
$response = $::client->post("$url", \%form);
+
 
 
my $html = $response->content;
 
my $html = $response->content;
 
$success = $response->is_success && $html =~ /Action complete/;
 
$success = $response->is_success && $html =~ /Action complete/;
Line 363: Line 363:
 
return $success;
 
return $success;
 
}
 
}
+
 
+
 
 
# Download an uploaded file by name from a wiki to a local file
 
# Download an uploaded file by name from a wiki to a local file
 
# - if no namespace is supplied with the source then "Image" is used
 
# - if no namespace is supplied with the source then "Image" is used
Line 384: Line 384:
 
}
 
}
 
}
 
}
+
 
+
 
 
# Download all uploaded files from a wiki to a local directory
 
# Download all uploaded files from a wiki to a local directory
 
# - to a maximum of 500 images
 
# - to a maximum of 500 images
Line 394: Line 394:
 
my $list  = $::client->get("$wiki?title=Special:Imagelist&limit=500")->content;
 
my $list  = $::client->get("$wiki?title=Special:Imagelist&limit=500")->content;
 
my @files = $list =~ /href\s*=\s*['"](\/[^"']+?\/.\/..\/[^'"]+?)["']/g;
 
my @files = $list =~ /href\s*=\s*['"](\/[^"']+?\/.\/..\/[^'"]+?)["']/g;
+
 
 
mkdir $dir;
 
mkdir $dir;
 
for my $url (@files) {
 
for my $url (@files) {
Line 406: Line 406:
 
}
 
}
 
}
 
}
+
 
+
 
 
# Change protection state of an article
 
# Change protection state of an article
 
# - relevant from 1.8+. From 1.12+ so may as well use API
 
# - relevant from 1.8+. From 1.12+ so may as well use API
Line 422: Line 422:
 
$cascade,      # optional boolean for cascading restrictions over transcluded articles
 
$cascade,      # optional boolean for cascading restrictions over transcluded articles
 
) = @_;
 
) = @_;
+
 
 
if (not $restrictions) { $restrictions = { "edit" => "", "move" => "" } }
 
if (not $restrictions) { $restrictions = { "edit" => "", "move" => "" } }
+
 
 
# A list of defaults which could be used in usage logAdd reporting  
 
# A list of defaults which could be used in usage logAdd reporting  
 
# my $defaults = {
 
# my $defaults = {
Line 431: Line 431:
 
# "Sysops only"              => "sysop"
 
# "Sysops only"              => "sysop"
 
# };
 
# };
+
 
my $url = "$wiki?title=$title&action=protect";
 
my $url = "$wiki?title=$title&action=protect";
 
     my $success = 0;
 
     my $success = 0;
Line 447: Line 447:
 
# Same problem, post on line 392 doesn't return content
 
# Same problem, post on line 392 doesn't return content
 
$success = $response->is_success && $response->content =~ m/<input.+?name="wpEditToken".+?value="(.*?)"/s;
 
$success = $response->is_success && $response->content =~ m/<input.+?name="wpEditToken".+?value="(.*?)"/s;
+
 
 
%form = (
 
%form = (
 
"wpEditToken"      => $1,
 
"wpEditToken"      => $1,
Line 453: Line 453:
 
"mwProtect-reason"  => $comment  || "",
 
"mwProtect-reason"  => $comment  || "",
 
);
 
);
+
 
 
$form{"mwProtect-level-$_"} = $restrictions->{$_} for keys %{$restrictions};
 
$form{"mwProtect-level-$_"} = $restrictions->{$_} for keys %{$restrictions};
 
# Allowing for cascade option
 
# Allowing for cascade option
Line 466: Line 466:
 
   
 
   
 
# Replace parameters in a template call using examineBraces
 
# Replace parameters in a template call using examineBraces
# (done) - allow for no param hash which would result in [[:Template:Template]]
+
# (done) - allow for no param hash which would result in {{template}}
# - account for both templates or parser-functions, i.e.  
+
# - account for both templates or parser-functions, i.e. {{foo|args..}} or {{#foo:args...}}
 
#UNIQ7fc616d244d9c035-item-1--QIN or {{#foo:args...}}
 
 
# - allow for multiple templates of same name by matching first param, then second etc
 
# - allow for multiple templates of same name by matching first param, then second etc
 
#
 
#
Line 488: Line 486:
 
$minor
 
$minor
 
) = @_;
 
) = @_;
+
 
 
  $success = 0;
 
  $success = 0;
+
 
   $template || ($template = "template");
 
   $template || ($template = "template");
 
my $wtext = wikiRawPage($wiki, $title);
 
my $wtext = wikiRawPage($wiki, $title);
+
 
 
# Use examine braces to get all content
 
# Use examine braces to get all content
 
my @articleBraces = examineBraces($wtext);
 
my @articleBraces = examineBraces($wtext);
+
 
# Array of matches
 
# Array of matches
 
my @matches  = ();
 
my @matches  = ();
Line 508: Line 506:
 
}
 
}
 
}
 
}
+
 
if( scalar(@matches) < 1){return($success)}          # no braces of matching name
 
if( scalar(@matches) < 1){return($success)}          # no braces of matching name
 
elsif (scalar(@matches) == 1){
 
elsif (scalar(@matches) == 1){
Line 522: Line 520:
 
my $ambvalue = $ambig->{$ambkey};
 
my $ambvalue = $ambig->{$ambkey};
 
foreach(@matches) {
 
foreach(@matches) {
+
 
 
$templateParams = substr($wtext, $_->{'OFFSET'}, $_->{'LENGTH'});
 
$templateParams = substr($wtext, $_->{'OFFSET'}, $_->{'LENGTH'});
 
if($templateParams =~ m/$ambkey\s*=\s*$ambvalue/g) {
 
if($templateParams =~ m/$ambkey\s*=\s*$ambvalue/g) {
Line 528: Line 526:
 
}
 
}
 
}
 
}
+
 
if (scalar @brace > 1){ # None found
 
if (scalar @brace > 1){ # None found
 
logAdd("Aborting ambiguous parameter match found");
 
logAdd("Aborting ambiguous parameter match found");
Line 552: Line 550:
 
return $success;
 
return $success;
 
}
 
}
+
 
 
# Return information on brace-structure in passed wikitext
 
# Return information on brace-structure in passed wikitext
 
sub examineBraces {
 
sub examineBraces {
Line 572: Line 570:
 
         return @braces;
 
         return @braces;
 
}
 
}
+
 
 
# Using Special:Movepage/article
 
# Using Special:Movepage/article
 
# wpNewTitle
 
# wpNewTitle
Line 586: Line 584:
 
while ($retries--) {
 
while ($retries--) {
 
my $response = $::client->get($url);
 
my $response = $::client->get($url);
+
 
 
# Todo: Need to catch output where user does not have move privileges
 
# Todo: Need to catch output where user does not have move privileges
+
 
 
# Permissions Errors
 
# Permissions Errors
 
#You must be a registered user and logged in to move a page
 
#You must be a registered user and logged in to move a page
+
 
 
# Special:Movepage seems to move any non-existent page then throw the message after posting;
 
# Special:Movepage seems to move any non-existent page then throw the message after posting;
 
# This action cannot be performed on this page
 
# This action cannot be performed on this page
 
# <input type="hidden" value="095485e50db577baa80c407d0e032e43+\" name="wpEditToken"/>
 
# <input type="hidden" value="095485e50db577baa80c407d0e032e43+\" name="wpEditToken"/>
 
#### Interesting 'value' and 'name' can be reversed, and single or double quoted
 
#### Interesting 'value' and 'name' can be reversed, and single or double quoted
+
 
 
if ($response->is_success && $response->content =~ m/<h1 class="firstHeading">Permissions Errors<\/h1>/) {
 
if ($response->is_success && $response->content =~ m/<h1 class="firstHeading">Permissions Errors<\/h1>/) {
 
logAdd("User $user does not have permissions to move $oldname");
 
logAdd("User $user does not have permissions to move $oldname");
 
return 0;
 
return 0;
 
}
 
}
+
 
 
if ($response->is_success && $response->content =~ m/<h1 class="firstHeading">Move page<\/h1>/) {
 
if ($response->is_success && $response->content =~ m/<h1 class="firstHeading">Move page<\/h1>/) {
 
$success = $response->is_success;
 
$success = $response->is_success;
Line 617: Line 615:
 
return $success;
 
return $success;
 
}
 
}
+
 
 
# See http://www.organicdesign.co.nz/MediaWiki_code_snippets
 
# See http://www.organicdesign.co.nz/MediaWiki_code_snippets
 
sub wikiExamineBraces {
 
sub wikiExamineBraces {
Line 637: Line 635:
 
         return @braces;
 
         return @braces;
 
}
 
}
+
 
 
# Create a GUID article title compatible with the RecordAdmin extension
 
# Create a GUID article title compatible with the RecordAdmin extension
 
sub wikiGuid {
 
sub wikiGuid {

Revision as of 06:30, 28 May 2009

  1. !/usr/bin/perl
  2. - based on wiki.plOur Perl scripts.Automated scripts to perform batch automation.
  3. - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html)
  4. - Authors: Nad, Sven
  5. - Source: http://www.organicdesign.co.nz/wiki.pl
  6. - Started: 2008-03-16
  7. - Updated: 2009-02-27
  8. - Tested versions: 1.6.10, 1.8.4, 1.9.3, 1.10.2, 1.11.0, 1.12.rc1, 1.13.2, 1.14.0
  9. http://www.mediawiki.org/wiki/Manual:Parameters_to_index.php#What_to_do
  10. - Provides details on required name fields in forms
  1. NOTES REGARDING CHANGING TO PERL PACKAGE AND ADDING API SUPPORT
  2. - constructor:
  3. - login
  4. - get wiki version and whether to use HTML or API
  5. - get namespaces
  6. - get messages used in patterns (and make methods use messages in their regexp's so lang-independent)

use HTTP::Request; use LWP::UserAgent; use POSIX qw(strftime);

sub wikiLogin; sub wikiLogout; sub wikiEdit; sub wikiAppend; sub wikiLastEdit; sub wikiRawPage; sub wikiStructuredPage; sub wikiGetVersion; sub wikiGetNamespaces; sub wikiGetList; sub wikiDelete; sub wikiRestore; sub wikiUploadFile; sub wikiDeleteFile; sub wikiDownloadFile; sub wikiDownloadFiles; sub wikiProtect; sub wikiUpdateTemplate; sub wikiMove; sub wikiExamineBraces; sub wikiGuid;

  1. Set up a global client for making HTTP requests as a browser

$::client = LWP::UserAgent->new( cookie_jar => {}, agent => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.14)', from => 'wiki.pl@organicdesign.co.nz', timeout => 10, max_size => 100000 );

sub logAdd { my $entry = shift; if ( $::log ) { open LOGH, '>>', $::log or die "Can't open $::log for writing!"; print LOGH localtime()."$entry\n"; close LOGH; } else { print STDERR "$entry\n" } return $entry; }

sub logHash { my $href = shift; while(($key, $value) = each %$href) { print STDERR "$key => $value\n"; } }

  1. Login to a MediaWiki
  2. todo: check if logged in first

sub wikiLogin { my ($wiki, $user, $pass, $domain) = @_; my $url = "$wiki?title=Special:Userlogin"; my $success = 0; my $retries = 1; while ($retries--) { my $html = ; if ($::client->get($url)->is_success) { my %form = (wpName => $user, wpPassword => $pass, wpDomain => $domain, wpLoginattempt => 'Log in', wpRemember => '1'); my $response = $::client->post("$url&action=submitlogin&type=login", \%form); $html = $response->content; $success = $response->is_redirect || ($response->is_success && $html =~ /You are now logged in/); } if ($success) { logAdd "$user successfully logged in to $wiki."; $retries = 0; } else {

if ($html =~ /
\s*(

.+?<\/h2>\s*)?(.+?)\s*<\/div>/is) { logAdd "ERROR: $2" }

else { logAdd "ERROR: couldn't log $user in to $wiki!" } } } return $success; }

  1. Logout of a MediaWiki

sub wikiLogout { my $wiki = shift; my $success = $::client->get("$wiki?title=Special:Userlogout")->is_success; logAdd $success ? "Successfully logged out of $wiki." : "WARNING: couldn't log out of $wiki!"; return $success; }

  1. Edit a MediaWiki page
  2. todo: don't return success if edited succeeded but made no changes

sub wikiEdit { my ($wiki, $title, $content, $comment, $minor) = @_; logAdd "Attempting to edit \"$title\" on $wiki"; my $success = 0; my $err = 'ERROR'; my $retries = 1;

while ($retries--) { my @matches; # Request the page for editing and extract the edit-token my $response = $::client->get("$wiki?title=$title&action=edit"); if ($response->is_success and ( $response->content =~ m|<input type='hidden' value="(.+?)" name="wpEditToken" />|g )) {

# Got token etc, now submit an edit-form my %form = ( wpEditToken => $1, wpTextbox1 => $content, wpSummary => $comment, wpSave => 'Save page' ); $form{wpMinoredit} = 1 if $minor;

my $tokens = @{[$response->content =~ m|(<input type='hidden'.+type="hidden" value=".*?" />)|gs]};

# Grabbing fields separately as hidden input order may vary in global regex $response->content =~ m|<input type='hidden' value="(.*?)" name="wpSection" />| && ($form{wpSection} = $1); $response->content =~ m|<input type='hidden' value="(.*?)" name="wpStarttime" />| && ($form{wpStarttime} = $1); $response->content =~ m|<input type='hidden' value="(.*?)" name="wpEdittime" />| && ($form{wpEdittime} = $1); $response->content =~ m|<input name="wpAutoSummary" type="hidden" value="(.*?)" />| && ($form{wpAutoSummary} = $1); $response = $::client->post("$wiki?title=$title&action=submit", \%form);

if ($response->content =~ /Someone else has changed this page/) { $err = 'EDIT CONFLICT'; $retries = 0; } else { $success = !$response->is_error } } else { $err = $response->is_success ? 'MATCH FAILED' : 'RQST FAILED' } if ($success) { $retries = 0; logAdd "\"$title\" updated." } else { logAdd "$err: Couldn't edit \"$title\" on $wiki!\n" } } return $success; }

  1. Append a wiki page

sub wikiAppend { my ($wiki,$title,$append,$comment) = @_; my $content = wikiRawPage($wiki,$title); $content = if $content eq '(There is currently no text in this page)'; return wikiEdit($wiki,$title,$content.$append,$comment); }

  1. Get the date of last edit of an article

sub wikiLastEdit { my($wiki,$title) = @_; # Request the last history entry and extract date my $response = $::client->request(HTTP::Request->new(GET => "$wiki?title=$title&action=history&limit=1")); return $1 if $response->is_success and $response->content =~ /<a.+?>(\d+:\d+.+?\d)<\/a>/; }

  1. Retrieve the raw content of a page

sub wikiRawPage { my($wiki,$title,$expand) = @_; my $response = $::client->get("$wiki?title=$title&action=raw".($expand ? '&templates=expand' : )); return $response->content if $response->is_success; }

  1. Return a hash of sections, each containing text, lists, links and templates
  2. - if only one parameter supplied, then it's assumed to be the wikitext content to extract structure from

sub wikiStructuredPage { my($wiki,$title) = @_; $page = $title ? wikiRawPage($wiki, $title, 1) : $wiki; my %page = (); for (split /^=+\s*/m,$page) { /(.+?)\s*=+\s*(.+?)\s*/s; my($heading,$content) = ($1, $2);

# todo: extract lists, links, templates from content

# if heading, add a node and put content, lists, links in it, else put under root if ($1) { print "$1\n----\n"; } } return %page; }

  1. Returns mediawiki version string

sub wikiGetVersion { my $wiki = shift; my $response = $::client->get("$wiki?title=Special:Version&action=render"); return $1 if $response->content =~ /MediaWiki.+?: ([0-9.]+[0x20-0x7e]+)/; }

  1. Return a hash (number => name) of the wiki's namespaces

sub wikiGetNamespaces { my $wiki = shift; my $response = $::client->get("$wiki?title=Special:Allpages"); $response->content =~ /<select id="namespace".+?>\s*(.+?)\s*<\/select>/s; return ($1 =~ /<option.*?value="([0-9]+)".*?>(.+?)<\/option>/gs, 0 => ); }

  1. Returns hash (anchor => href) list elements in article content

sub wikiGetList { my( $wiki, $title ) = @_; my $response = $::client->get("$wiki?title=$title"); $response->content =~ /(.+)/s; my $html = $1;

my %list = $html =~ /
  • .*?<a.*?href="(.+?)".*?>(.+?)<\/a>\s*<\/li>/gs; my %tmp = (); # bugfix: swap keys/vals while (my($k, $v) = each %list) { $tmp{$v} = $k }; return %tmp; }
    1. Todo error checking on the type of failure, e.g. no user rights to delete
    2. Capture error if article already deleted
    sub wikiDelete { my( $wiki, $title, $reason ) = @_; my $url = "$wiki?title=$title&action=delete"; unless($reason) { $reason = "content was: \'$title\'"; } my $success = 0; my $err = 'ERROR'; my $retries = 1; while ($retries--) { my $html = ; my $response = $::client->get($url); if ($response->is_success && $response->content =~ m/<input name="wpEditToken".*? value="(.*?)".*?<\/form>/s) { my %form = (wpEditToken => $1, wpReason => $reason); $response = $::client->post($url, \%form); $html = $response->content; $success = $response->is_success && $html =~ /Action complete/; } if ($success) { logAdd "$user successfully deleted $title."; $retries = 0; } # Parser response to determine if user has sysop privileges } return $success; }
    1. Todo logAdd the revision/all revisions
    sub wikiRestore { my( $wiki, $title, $reason, $revision ) = @_; my $url = "$wiki?title=Special:Undelete"; unless($reason) { $reason = "Restoring: \'$title\'"; } my $success = 0; my $err = 'ERROR'; my $retries = 1; while ($retries--) { my $html = ; my $response = $::client->get("$url&target=$title"); if ($response->is_success && $response->content =~ m/<input name="wpEditToken".*? value="(.*?)".*?<\/form>/s) { my %form = (wpComment => $reason, target => $title, wpEditToken => $1, restore=>"Restore"); my @timestamps = $response->content =~ m/<input .*?"(ts\d*?)".*?/g; # Restore specified $revision if ($revision) { if($#timestamps <($revision-1)) { $revision = $#timestamps; logAdd("Warning: \$revision specifed does not exist"); } $form{$timestamps[$revision-1]} = 1; } else { @form{@timestamps} = (undef) x @timestamps } $response = $::client->post("$url&action=submit", \%form); $html = $response->content; $success = $response->is_success && $html =~ /has been restored/; } if ($success) { logAdd "$user successfully restored $title."; $retries = 0; } # Parser response to determine if user has sysop privileges } return $success; }
    1. Upload a files into a wiki using its Special:Upload page
    sub wikiUploadFile { my ($wiki, $sourcefile, $destname, $summary ) = @_; my $url = "$wiki?title=Special:Upload&action=submit"; my $success = 0; my $err = 'ERROR'; my $retries = 1; while ($retries--) { %form = ( wpSourceType => 'file', wpDestFile => $destname, wpUploadDescription => $summary, wpUpload => "Upload file", wpDestFileWarningAck => , wpUploadFile => [$sourcefile => $destname], wpWatchthis => '0', ); my $response = $::client->post($url, \%form, Content_Type => 'form-data'); $success = $response->is_success; # Check if file is already uploaded if ($success && $response->content =~ m/Upload warning.+?(A file with this name exists already|File name has been changed to)/s) { $response->content =~ m/<input type='hidden' name='wpSessionKey' value="(.+?)" \/>/; # Need to grab the wpSessionKey input field $form{'wpSessionKey'} = $1; $form{'wpIgnoreWarning'} = 'true'; $form{'wpDestFileWarningAck'} = 1, $form{'wpLicense'} = ; $form{'wpUpload'} = "Save file", $response = $::client->post("$url&action=submit", \%form, Content_Type => 'form-data'); logAdd("Uploaded a new version of $destname"); } else { logAdd("Uploaded $destname") } } return $success; }
    1. Delete an uploaded file from a wiki
    sub wikiDeleteFile { my ($wiki, $imagename, $comment) =@_; my $url = "$wiki?title=Image:$imagename&action=delete"; my $success = 0; my $err = 'ERROR'; my $retries = 1; while ($retries--) { my $response = $::client->get("$url"); if ($response->is_success && $response->content =~ m/Permission error.+?The action you have requested is limited to users in the group/s) { logAdd("Error: $user does not have the permissions to delete $imagename"); return $success; } if ($response->is_success && $response->content =~ m/Internal error.+?Could not delete the page or file specified/s) { logAdd("Error: Could not delete $imagename - already deleted?"); return $success; } if ($response->is_success && $response->content =~ m/Delete $imagename.+?<input.+?name="wpEditToken".+?value="(.*?)".+?Reason for deletion:/is) { %form = ( wpEditToken => $1, wpDeleteReasonList => "other", wpReason => $comment || "", 'mw-filedelete-submit' => "Delete", ); logAdd("Deleted Image:$imagename"); $response = $::client->post("$url", \%form); my $html = $response->content; $success = $response->is_success && $html =~ /Action complete/; } } return $success; }
    1. Download an uploaded file by name from a wiki to a local file
    2. - if no namespace is supplied with the source then "Image" is used
    3. - if no destination filename is specified, the image name is used
    sub wikiDownloadFile { my ($wiki, $src, $dst) = @_; $src =~ /^((.+?):)?(.+)$/; $src = $1 ? "$2$src" : "Image:$src"; $dst = $dst ? $dst : $2; my $base = $wiki =~ /(https?:\/\/(.+?))\// ? $1 : return 0; my $page = $::client->get("$wiki?title=$src&action=render")->content; if (my $url = $page =~ /href\s*=\s*['"](\/[^"']+?\/.\/..\/[^'"]+?)["']/ ? $1 : 0) { my $file = $url =~ /.+\/(.+?)$/ ? $1 : die 'wiki-downloaded-file'; logAdd("Downloading \"$src\""); open FH, '>', $file; binmode FH; print FH $::client->get("$base$url")->content; close FH; } }
    1. Download all uploaded files from a wiki to a local directory
    2. - to a maximum of 500 images
    sub wikiDownloadFiles { my ($wiki, $dir) = @_; $dir = $wiki =~ /(https?:\/\/(.+?))\// ? $2 : 'wiki-downloaded-files'; my $base = $1; my $list = $::client->get("$wiki?title=Special:Imagelist&limit=500")->content; my @files = $list =~ /href\s*=\s*['"](\/[^"']+?\/.\/..\/[^'"]+?)["']/g; mkdir $dir; for my $url (@files) { if (my $file = $url =~ /.+\/(.+?)$/ ? $1 : 0) { logAdd("Dwonloading \"$file\""); open FH, '>', "$dir/$file"; binmode FH; print FH $::client->get("$base$url")->content; close FH; } } }
    1. Change protection state of an article
    2. - relevant from 1.8+. From 1.12+ so may as well use API
    3. - see http://www.mediawiki.org/wiki/API:Edit_-_Protect
    4. - we need this working so that we can use a bot to change #security annotations to protection when SS4 ready
    sub wikiProtect { # Standard way first, use API later with wikiGetVersion check my ( $wiki, $title, $comment , $restrictions, # hashref of action=group pairs $expiry, # optional expiry date string $cascade, # optional boolean for cascading restrictions over transcluded articles ) = @_; if (not $restrictions) { $restrictions = { "edit" => "", "move" => "" } } # A list of defaults which could be used in usage logAdd reporting # my $defaults = { # "(default)" => "", # "block unregistered users" => "autoconfirmed", # "Sysops only" => "sysop" # }; my $url = "$wiki?title=$title&action=protect"; my $success = 0; my $err = 'ERROR'; my $retries = 1; while ($retries--) { my $response = $::client->get($url); if ($response->is_success and $response->content =~ m/Confirm protection.+?The action you have requested is limited to users in the group/s) { logAdd("$user does not have permission to protect $title"); return($success); } if ($response->is_success and $response->content =~ m/Confirm protection.+?You may view and change the protection level here for the page/s) { # Same problem, post on line 392 doesn't return content $success = $response->is_success && $response->content =~ m/<input.+?name="wpEditToken".+?value="(.*?)"/s; %form = ( "wpEditToken" => $1, "mwProtect-expiry" => $expiry || "", "mwProtect-reason" => $comment || "", ); $form{"mwProtect-level-$_"} = $restrictions->{$_} for keys %{$restrictions}; # Allowing for cascade option if ($cascade && $restrictions->{'edit'} == "sysop") { $form{"mwProtect-cascade"} = 1 } $response = $::client->post($url, \%form); logAdd("Setting protect article permissions"); logHash(\%form); } } return $success; }
    1. Replace parameters in a template call using examineBraces
    2. (done) - allow for no param hash which would result in Template:Template
    3. - account for both templates or parser-functions, i.e. {{#workflow:foo
    |Stub=Template:Document/Stub |bar=baz }} or {{#foo:args...}}
    1. - allow for multiple templates of same name by matching first param, then second etc
    2. - e.g.
    3. wikiUpdateTemplate( $wiki, $title, "#foo", { 'id' => 123, 'bar' => 'baz' } )
    4. if two #foo calls exist, then only one having an "id" param equal to 123 would be updated
    5. if two have such an id, then the comparison would resort to the second arg and so on
    6. if this process cannot result in an unambiguous update it should fail with an error saying so
    sub wikiUpdateTemplate { my ( $wiki, $title, $template, # Name of template to update $params , # hashref of param/value pairs to update the template with $ambig , # $comment , $minor ) = @_; $success = 0; $template || ($template = "template"); my $wtext = wikiRawPage($wiki, $title); # Use examine braces to get all content my @articleBraces = examineBraces($wtext); # Array of matches my @matches = (); # Array of ambig braces my @brace = (); my$templateParams; my $newparams; foreach (@articleBraces) { if ($_->{'NAME'} eq $template) { push(@matches, $_); } } if( scalar(@matches) < 1){return($success)} # no braces of matching name elsif (scalar(@matches) == 1){ $templateParams = substr($wtext, $matches->[0]->{'OFFSET'}, $matches->[0]->{'LENGTH'}); push(@brace, $matches[0]); } # single match else{ # ambiguous if(ref($params) !="HASH" || scalar(%$params)< 1){ # no params return ($success); } # Check $ambig is in instances of $template my $ambkey = (keys %{$ambig})[0]; my $ambvalue = $ambig->{$ambkey}; foreach(@matches) { $templateParams = substr($wtext, $_->{'OFFSET'}, $_->{'LENGTH'}); if($templateParams =~ m/$ambkey\s*=\s*$ambvalue/g) { push(@brace, $_); } } if (scalar @brace > 1){ # None found logAdd("Aborting ambiguous parameter match found"); return $success; } else { # Update with new parameters $newparams="{{$brace[0]->{'NAME'}"; my $isparser = ($brace[0]->{'NAME'} =~ /:$/); my $sep= ($isparser ? "" : "|"); foreach( keys %$params) { ($newparams .= "${sep}$_=$params->{$_}"); if($isparser) { $sep = "|"; $isparser = 0; } } $newparams.="}}"; } # Update template content in article - this is NOT WORKING! substr($wtext, $brace[0]->{'OFFSET'}, $brace[0]->{'LENGTH'}, $newparams); $success = wikiEdit($wiki, $title, $wtext, $comment, $minor); } return $success; }
    1. Return information on brace-structure in passed wikitext
    sub examineBraces { my $content = shift; my @braces = (); my @depths = (); my $depth = 0; while ($content =~ m/\G.*?(\{\{\s*([#a-z0-9_]*:?)|\}\})/sig) { my $offset = pos($content)-length($2)-2; if ($1 eq '}}') { $brace = $braces[$depths[$depth-1]]; $$brace{LENGTH} = $offset-$$brace{OFFSET}+2; $$brace{DEPTH} = $depth--; } else { push @braces, { NAME => $2, OFFSET => $offset }; $depths[$depth++] = $#braces; } } return @braces; }
    1. Using Special:Movepage/article
    2. wpNewTitle
    3. wpMovetalk (logical checkbox)
    4. wpMove (action=submit)
    sub wikiMove { my ($wiki, $oldname, $newname, $reason, $movetalk) = @_; my $url = "$wiki?title=Special:Movepage&target=$oldname"; logAdd("URL=>$url"); my $success = 0; my $err = 'ERROR'; my $retries = 1; while ($retries--) { my $response = $::client->get($url); # Todo: Need to catch output where user does not have move privileges # Permissions Errors #You must be a registered user and logged in to move a page # Special:Movepage seems to move any non-existent page then throw the message after posting; # This action cannot be performed on this page # <input type="hidden" value="095485e50db577baa80c407d0e032e43+\" name="wpEditToken"/> #### Interesting 'value' and 'name' can be reversed, and single or double quoted if ($response->is_success && $response->content =~ m/

    Permissions Errors<\/h1>/) { logAdd("User $user does not have permissions to move $oldname"); return 0; } if ($response->is_success && $response->content =~ m/<h1 class="firstHeading">Move page<\/h1>/) { $success = $response->is_success; $response->content =~ m/<input.+?name=['"]wpEditToken["'].+?value=['"](.*?)["']/s; %form = ( wpEditToken => $1, wpNewTitle => $newname, wpReason => $reason || "", wpMovetalk => $movetalk || "" ); $response = $::client->post("$url&action=submit", \%form); logAdd("Moving $oldname to $newname"); } } return $success; }
    1. See http://www.organicdesign.co.nz/MediaWiki_code_snippets
    sub wikiExamineBraces { my $content = shift; my @braces = (); my @depths = (); my $depth = 0; while ($content =~ m/\G.*?(\{\{\s*([#a-z0-9_]*:?)|\}\})/sig) { my $offset = pos($content)-length($2)-2; if ($1 eq '}}') { $brace = $braces[$depths[$depth-1]]; $$brace{LENGTH} = $offset-$$brace{OFFSET}+2; $$brace{DEPTH} = $depth--; } else { push @braces, { NAME => $2, OFFSET => $offset }; $depths[$depth++] = $#braces; } } return @braces; }
    1. Create a GUID article title compatible with the RecordAdmin extension
    sub wikiGuid { $guid = strftime('%Y%m%d', localtime).'-'; $guid .= chr( rand() < .72 ? int(rand(26)+65) : int(rand(10)+48) ) for 1..5; return $guid; }