Difference between revisions of "Import CSV data into a wiki"
m (bug fix) |
(Start adding template fetching and parsing ability) |
||
Line 1: | Line 1: | ||
#!/usr/bin/perl | #!/usr/bin/perl | ||
− | # | + | # |
# - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html) | # - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html) | ||
# - Author: http://www.organicdesign.co.nz/nad | # - Author: http://www.organicdesign.co.nz/nad | ||
# - Source: http://www.organicdesign.co.nz/scraper.pl | # - Source: http://www.organicdesign.co.nz/scraper.pl | ||
# - Started: 2008-03-21 | # - Started: 2008-03-21 | ||
− | + | ||
require('wiki.pl'); | require('wiki.pl'); | ||
− | + | ||
# Job, log and error files | # Job, log and error files | ||
$ARGV[0] or die "No job file specified!"; | $ARGV[0] or die "No job file specified!"; | ||
$ARGV[0] =~ /^(.+?)(\..+?)?$/; | $ARGV[0] =~ /^(.+?)(\..+?)?$/; | ||
+ | |||
+ | # Set a debug conditional | ||
+ | $::debug = 1; | ||
+ | |||
$::log = "$1.log"; | $::log = "$1.log"; | ||
$::err = "$1.err"; | $::err = "$1.err"; | ||
Line 16: | Line 20: | ||
$::title = 0; | $::title = 0; | ||
$::template = 'Record'; | $::template = 'Record'; | ||
− | + | ||
# Parse the job file | # Parse the job file | ||
if (open JOB,'<',$ARGV[0]) { | if (open JOB,'<',$ARGV[0]) { | ||
Line 30: | Line 34: | ||
close JOB; | close JOB; | ||
} else { die "Couldn't parse job file!" } | } else { die "Couldn't parse job file!" } | ||
− | + | ||
# Open CSV file and read in headings line | # Open CSV file and read in headings line | ||
Line 38: | Line 42: | ||
@headings = split /$::sep/i, $1; | @headings = split /$::sep/i, $1; | ||
} else { die "Could not open CSV file!" } | } else { die "Could not open CSV file!" } | ||
− | + | ||
# Log in to the wiki | # Log in to the wiki | ||
wikiLogin($::wiki,$::user,$::pass) or exit; | wikiLogin($::wiki,$::user,$::pass) or exit; | ||
+ | |||
+ | # fetch the template if it exists | ||
+ | $response = $client->get("$wiki?title=$template&action=raw"); | ||
+ | if( $response->is_success ) { | ||
+ | $wikitext = $response->content; | ||
+ | |||
+ | |||
+ | # Remove noinclude areas | ||
+ | $wikitext =~ s/<noinclude>.+?<\/noinclude>//gs; | ||
+ | |||
+ | # Find all unique {{{parameters}}} | ||
+ | # http://en.wikipedia.org/wiki/Help:Templates#Parameters | ||
+ | |||
+ | while ($wikitext =~ m/\{{{(.+?)}}}/g ) { | ||
+ | $key = $1; | ||
+ | # Remove default anchors if there are any | ||
+ | $key =~ s/\|.*//; | ||
+ | $params{$key} = undef; | ||
+ | } | ||
+ | |||
+ | if($::debug) { | ||
+ | print "@headings"; | ||
+ | print "\n\n\n|@{[%params]}|\n\n\n"; | ||
+ | die "[\$::debug set exiting]\n" ; | ||
+ | } | ||
+ | } | ||
+ | |||
# Get batch size and current number (also later account for n-bots) | # Get batch size and current number (also later account for n-bots) | ||
− | + | ||
# todo: log batch start | # todo: log batch start | ||
− | + | ||
# Process the records | # Process the records | ||
$n = 1; | $n = 1; | ||
Line 55: | Line 86: | ||
$tmpl .= "}}"; | $tmpl .= "}}"; | ||
print "Processing record ".$n++."\n"; | print "Processing record ".$n++."\n"; | ||
− | + | ||
# Update the record | # Update the record | ||
$text = wikiRawPage($::wiki,$record[$::title],0); | $text = wikiRawPage($::wiki,$record[$::title],0); | ||
$text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is; | $text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is; | ||
$done = wikiPageEdit($::wiki,$record[$::title],$text,"$template updated by csv2wiki.pl"); | $done = wikiPageEdit($::wiki,$record[$::title],$text,"$template updated by csv2wiki.pl"); | ||
− | + | ||
# log a row error if any | # log a row error if any | ||
} | } | ||
− | + | ||
close CSV; | close CSV; |
Revision as of 12:25, 28 May 2008
- !/usr/bin/perl
- - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html)
- - Author: http://www.organicdesign.co.nz/nad
- - Source: http://www.organicdesign.co.nz/scraper.pl
- - Started: 2008-03-21
require('wiki.pl');
- Job, log and error files
$ARGV[0] or die "No job file specified!"; $ARGV[0] =~ /^(.+?)(\..+?)?$/;
- Set a debug conditional
$::debug = 1;
$::log = "$1.log"; $::err = "$1.err"; $::sep = ','; $::title = 0; $::template = 'Record';
- Parse the job file
if (open JOB,'<',$ARGV[0]) { for (<JOB>) { if (/^\*?\s*csv\s*:\s*(.+?)\s*$/i) { $::csv = $1 } if (/^\*?\s*wiki\s*:\s*(.+?)\s*$/i) { $::wiki = $1 } if (/^\*?\s*user\s*:\s*(.+?)\s*$/i) { $::user = $1 } if (/^\*?\s*pass\s*:\s*(.+?)\s*$/i) { $::pass = $1 } if (/^\*?\s*separator\s*:\s*"(.+?)"\s*$/i) { $::sep = $1 } if (/^\*?\s*title\s*:\s*(.+?)\s*$/i) { $::title = $1 } if (/^\*?\s*template\s*:\s*(.+?)\s*$/i) { $::template = $1 } } close JOB; } else { die "Couldn't parse job file!" }
- Open CSV file and read in headings line
if (open CSV,'<',$::csv) { $_ = <CSV>; /^\s*(.+?)\s*$/; @headings = split /$::sep/i, $1; } else { die "Could not open CSV file!" }
- Log in to the wiki
wikiLogin($::wiki,$::user,$::pass) or exit;
- fetch the template if it exists
$response = $client->get("$wiki?title=$template&action=raw"); if( $response->is_success ) {
$wikitext = $response->content;
# Remove noinclude areas $wikitext =~ s/.+?<\/noinclude>//gs;
# Find all unique {{{parameters}}} # http://en.wikipedia.org/wiki/Help:Templates#Parameters
while ($wikitext =~ m/\{{{(.+?)}}}/g ) { $key = $1; # Remove default anchors if there are any $key =~ s/\|.*//; $params{$key} = undef; }
if($::debug) { print "@headings"; print "\n\n\n|@{[%params]}|\n\n\n"; die "[\$::debug set exiting]\n" ; }
}
- Get batch size and current number (also later account for n-bots)
- todo: log batch start
- Process the records
$n = 1; while (<CSV>) { /^\s*(.+?)\s*$/; @record = split /$::sep/, $1; $tmpl = "{{$template\n"; $tmpl .= "|$headings[$_] = $record[$_]\n" for 0..$#headings; $tmpl .= "}}"; print "Processing record ".$n++."\n";
# Update the record $text = wikiRawPage($::wiki,$record[$::title],0); $text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is; $done = wikiPageEdit($::wiki,$record[$::title],$text,"$template updated by csv2wiki.pl");
# log a row error if any }
close CSV;