Difference between revisions of "Import CSV data into a wiki"
(remove protection) |
(tidy up) |
||
Line 39: | Line 39: | ||
if (/^\*?\s*prefix\s*:\s*(.+?)\s*$/i) { $::prefix = $1 } | if (/^\*?\s*prefix\s*:\s*(.+?)\s*$/i) { $::prefix = $1 } | ||
if (/^\*?\s*append\s*:\s*(.+?)\s*$/i) { $::append = $1 } | if (/^\*?\s*append\s*:\s*(.+?)\s*$/i) { $::append = $1 } | ||
− | + | } | |
close JOB; | close JOB; | ||
} else { die "Couldn't parse job file!" } | } else { die "Couldn't parse job file!" } | ||
Line 45: | Line 45: | ||
# Open CSV file and read in headings line | # Open CSV file and read in headings line | ||
− | if (open CSV,'<',$::csv) { | + | if (open CSV, '<', $::csv) { |
$_ = <CSV>; | $_ = <CSV>; | ||
/^\s*(.+?)\s*$/; | /^\s*(.+?)\s*$/; | ||
Line 56: | Line 56: | ||
# fetch the template if it exists | # fetch the template if it exists | ||
$response = $client->get("$wiki?title=Template:$template&action=raw"); | $response = $client->get("$wiki?title=Template:$template&action=raw"); | ||
− | if( $response->is_success ) { | + | if ($response->is_success) { |
− | + | $wikitext = $response->content; | |
− | + | # Remove noinclude areas | |
− | + | $wikitext =~ s/<noinclude>.+?<\/noinclude>//gs; | |
− | + | # Find all unique {{{parameters}}} | |
− | + | # http://en.wikipedia.org/wiki/Help:Templates#Parameters | |
− | + | $params{$1} = undef while $wikitext =~ /\{{3}(.+?)(\|.*?)?\}{3}/g; | |
− | |||
− | |||
− | + | # Create %{param=index} hash | |
− | + | foreach ($i = 0; $i <= $#headings; $i++) { | |
− | + | $params{$headings[$i]} = $i if exists $params{$headings[$i]}; | |
− | + | } | |
− | |||
− | |||
− | + | if ($::debug) { | |
− | + | print "\@headings: @headings\n"; | |
− | + | print "%params: @{[%params]}\n"; | |
− | + | } | |
} | } | ||
Line 91: | Line 87: | ||
/^\s*(.+?)\s*$/; | /^\s*(.+?)\s*$/; | ||
@record = split /$::sep/, $1; | @record = split /$::sep/, $1; | ||
− | $tmpl | + | $tmpl = "{{$template\n"; |
− | $tmpl | + | $tmpl .= "|$_ = $record[$params{$_}]\n" foreach keys %params; |
− | $tmpl | + | $tmpl .= "}}"; |
print "Processing record ".$n++."\n"; | print "Processing record ".$n++."\n"; | ||
− | if($::debug) { | + | if ($::debug) { |
print "\$tmpl = $tmpl\n"; | print "\$tmpl = $tmpl\n"; | ||
die "[\$::debug set exiting]\n" ; | die "[\$::debug set exiting]\n" ; | ||
} | } | ||
+ | |||
# Update the record | # Update the record | ||
$text = wikiRawPage($::wiki,$record[$::title],0); | $text = wikiRawPage($::wiki,$record[$::title],0); | ||
$text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is; | $text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is; | ||
− | if($append) { | + | if ($append) { |
− | $done | + | $done = wikiPageAppend( |
+ | $::wiki, | ||
+ | $::prefix . $record[$::title], | ||
+ | $text, | ||
+ | "[[Template:$::template|$::template]] appended using csv2wiki.pl" | ||
+ | ); | ||
} else { | } else { | ||
− | $done | + | $done = wikiPageEdit( |
+ | $::wiki, | ||
+ | $::prefix . $record[$::title], | ||
+ | $text, | ||
+ | "[[Template:$::template|$::template]] replacement using csv2wiki.pl" | ||
+ | ); | ||
} | } | ||
+ | |||
# log a row error if any | # log a row error if any | ||
} | } | ||
close CSV; | close CSV; |
Revision as of 07:36, 4 September 2008
- !/usr/bin/perl
- Our Perl scripts.Automated scripts to perform batch automation.
- - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html)
- - Authors: Nad Sven
- - Source: http://www.organicdesign.co.nz/scraper.pl
- - Started: 2008-03-21
- - API: http://en.wikipedia.org/w/api.php
- Todo
- Make it so that if there is no title then it increments
- $hashref = { $wikitext =~ /\{{3}(.+?)(\|.*?)?\}{3}/g }
require('wiki.pl');
- Job, log and error files
$ARGV[0] or die "No job file specified!"; $ARGV[0] =~ /^(.+?)(\..+?)?$/;
- Set a debug conditional
$::debug = 0;
$::log = "$1.log"; $::err = "$1.err"; $::sep = ','; $::title = 0; $::template = 'Record'; $::prefix = ""; $::append = 0;
- Parse the job file
if (open JOB,'<',$ARGV[0]) { for (<JOB>) { if (/^\*?\s*csv\s*:\s*(.+?)\s*$/i) { $::csv = $1 } if (/^\*?\s*wiki\s*:\s*(.+?)\s*$/i) { $::wiki = $1 } if (/^\*?\s*user\s*:\s*(.+?)\s*$/i) { $::user = $1 } if (/^\*?\s*pass\s*:\s*(.+?)\s*$/i) { $::pass = $1 } if (/^\*?\s*separator\s*:\s*"(.+?)"\s*$/i) { $::sep = $1 } if (/^\*?\s*title\s*:\s*(.+?)\s*$/i) { $::title = $1 } if (/^\*?\s*template\s*:\s*(.+?)\s*$/i) { $::template = $1 } if (/^\*?\s*prefix\s*:\s*(.+?)\s*$/i) { $::prefix = $1 } if (/^\*?\s*append\s*:\s*(.+?)\s*$/i) { $::append = $1 } } close JOB; } else { die "Couldn't parse job file!" }
- Open CSV file and read in headings line
if (open CSV, '<', $::csv) { $_ = <CSV>; /^\s*(.+?)\s*$/; @headings = split /$::sep/i, $1; } else { die "Could not open CSV file!" }
- Log in to the wiki
wikiLogin($::wiki,$::user,$::pass) or exit;
- fetch the template if it exists
$response = $client->get("$wiki?title=Template:$template&action=raw"); if ($response->is_success) { $wikitext = $response->content;
# Remove noinclude areas $wikitext =~ s/.+?<\/noinclude>//gs;
# Find all unique {{{parameters}}} # http://en.wikipedia.org/wiki/Help:Templates#Parameters
$params{$1} = undef while $wikitext =~ /\{{3}(.+?)(\|.*?)?\}{3}/g;
# Create %{param=index} hash foreach ($i = 0; $i <= $#headings; $i++) { $params{$headings[$i]} = $i if exists $params{$headings[$i]}; }
if ($::debug) { print "\@headings: @headings\n"; print "%params: @{[%params]}\n"; } }
- Get batch size and current number (also later account for n-bots)
- todo: log batch start
- Process the records
$n = 1; while (<CSV>) { /^\s*(.+?)\s*$/; @record = split /$::sep/, $1; $tmpl = "{{$template\n"; $tmpl .= "|$_ = $record[$params{$_}]\n" foreach keys %params; $tmpl .= "}}"; print "Processing record ".$n++."\n"; if ($::debug) { print "\$tmpl = $tmpl\n"; die "[\$::debug set exiting]\n" ; }
# Update the record $text = wikiRawPage($::wiki,$record[$::title],0); $text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is; if ($append) { $done = wikiPageAppend( $::wiki, $::prefix . $record[$::title], $text, "$::template appended using csv2wiki.pl" ); } else { $done = wikiPageEdit( $::wiki, $::prefix . $record[$::title], $text, "$::template replacement using csv2wiki.pl" );
}
# log a row error if any }
close CSV;