Difference between revisions of "Import CSV data into a wiki"
(Bug, should be accessing Template: based on /^\*?\s*template\s*:\s*(.+?)\s*$/i) |
(Add prefix - (redundant local $/="|"; can be removed)) |
||
| Line 21: | Line 21: | ||
$::title = 0; | $::title = 0; | ||
$::template = 'Record'; | $::template = 'Record'; | ||
| + | $::prefix = ""; | ||
# Parse the job file | # Parse the job file | ||
| Line 32: | Line 33: | ||
if (/^\*?\s*title\s*:\s*(.+?)\s*$/i) { $::title = $1 } | if (/^\*?\s*title\s*:\s*(.+?)\s*$/i) { $::title = $1 } | ||
if (/^\*?\s*template\s*:\s*(.+?)\s*$/i) { $::template = $1 } | if (/^\*?\s*template\s*:\s*(.+?)\s*$/i) { $::template = $1 } | ||
| + | if (/^\*?\s*prefix\s*:\s*(.+?)\s*$/i) { $::prefix = $1 } | ||
} | } | ||
close JOB; | close JOB; | ||
| Line 93: | Line 95: | ||
$tmpl = "{{$template\n"; | $tmpl = "{{$template\n"; | ||
# JUST NEED TO FIX UP HERE | # JUST NEED TO FIX UP HERE | ||
| + | local $/="|"; | ||
$tmpl .= "|$headings[$_] = $record[$_]\n" for 0..$#headings; | $tmpl .= "|$headings[$_] = $record[$_]\n" for 0..$#headings; | ||
$tmpl .= "}}"; | $tmpl .= "}}"; | ||
Revision as of 05:16, 29 May 2008
- !/usr/bin/perl
- Our Perl scripts.{{#security:*|sysop}}Automated scripts to perform batch automation.
- - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html)
- - Author: http://www.organicdesign.co.nz/nad
- - Source: http://www.organicdesign.co.nz/scraper.pl
- - Started: 2008-03-21
- - API: http://en.wikipedia.org/w/api.php
require('wiki.pl');
- Job, log and error files
$ARGV[0] or die "No job file specified!"; $ARGV[0] =~ /^(.+?)(\..+?)?$/;
- Set a debug conditional
$::debug = 0;
$::log = "$1.log"; $::err = "$1.err"; $::sep = ','; $::title = 0; $::template = 'Record'; $::prefix = "";
- Parse the job file
if (open JOB,'<',$ARGV[0]) { for (<JOB>) { if (/^\*?\s*csv\s*:\s*(.+?)\s*$/i) { $::csv = $1 } if (/^\*?\s*wiki\s*:\s*(.+?)\s*$/i) { $::wiki = $1 } if (/^\*?\s*user\s*:\s*(.+?)\s*$/i) { $::user = $1 } if (/^\*?\s*pass\s*:\s*(.+?)\s*$/i) { $::pass = $1 } if (/^\*?\s*separator\s*:\s*"(.+?)"\s*$/i) { $::sep = $1 } if (/^\*?\s*title\s*:\s*(.+?)\s*$/i) { $::title = $1 } if (/^\*?\s*template\s*:\s*(.+?)\s*$/i) { $::template = $1 } if (/^\*?\s*prefix\s*:\s*(.+?)\s*$/i) { $::prefix = $1 } } close JOB; } else { die "Couldn't parse job file!" }
- Open CSV file and read in headings line
if (open CSV,'<',$::csv) { $_ = <CSV>; /^\s*(.+?)\s*$/; @headings = split /$::sep/i, $1; } else { die "Could not open CSV file!" }
- Log in to the wiki
wikiLogin($::wiki,$::user,$::pass) or exit;
- fetch the template if it exists
$response = $client->get("$wiki?title=Template:$template&action=raw"); if( $response->is_success ) {
$wikitext = $response->content;
# Remove noinclude areas $wikitext =~ s/.+?<\/noinclude>//gs;
# Find all unique {{{parameters}}}
# http://en.wikipedia.org/wiki/Help:Templates#Parameters
while ($wikitext =~ m/\{{{(.+?)}}}/g ) {
$key = $1;
# Remove default anchors if there are any
$key =~ s/\|.*//;
$params{$key} = undef;
}
# Need to check @headings elements that are identical to keys(%param)
for( $i = 0; $i < @headings; $i++ ) {
if(exists($params{$headings[$i]})) {
print "$headings[$i] matches!\n";
push(@cols ,$i);
}
}
if($::debug) {
print "\@A: @headings\n";
print "%H: @{[%params]}\n";
print "\@cols @cols\n";
die "[\$::debug set exiting]\n" ;
}
}
- Get batch size and current number (also later account for n-bots)
- todo: log batch start
- Process the records
$n = 1; while (<CSV>) { /^\s*(.+?)\s*$/; @record = split /$::sep/, $1; $tmpl = "{{$template\n"; # JUST NEED TO FIX UP HERE
local $/="|";
$tmpl .= "|$headings[$_] = $record[$_]\n" for 0..$#headings; $tmpl .= "}}"; print "Processing record ".$n++."\n";
# Update the record $text = wikiRawPage($::wiki,$record[$::title],0); $text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is; $done = wikiPageEdit($::wiki,$record[$::title],$text,"$template updated by csv2wiki.pl");
# log a row error if any }
close CSV;



