Difference between revisions of "Import CSV data into a wiki"

From Organic Design wiki
(wikiPageAppend -->wikiAppend)
(moved to svn)
Line 1: Line 1:
#!/usr/bin/perl
+
{{svn|tools|csv2wiki.pl}}
# {{perl}}{{Category:Robots}}{{lowercase}}
 
# - Licenced under LGPL (http://www.gnu.org/copyleft/lesser.html)
 
# - Authors:  [http://www.organicdesign.co.nz/Nad Nad] [http://www.organicdesign.co.nz/Sven Sven]
 
# - Source:  http://www.organicdesign.co.nz/scraper.pl
 
# - Started: 2008-03-21
 
# - API:    http://en.wikipedia.org/w/api.php
 
 
# Todo
 
# Make it so that if there is no title then it increments
 
# $hashref = { $wikitext =~ /\{{3}(.+?)(\|.*?)?\}{3}/g }
 
require('wiki.pl');
 
 
# Job, log and error files
 
$ARGV[0] or die "No job file specified!";
 
$ARGV[0] =~ /^(.+?)(\..+?)?$/;
 
 
 
# Set a debug conditional
 
$::debug = 0;
 
 
 
$::log = "$1.log";
 
$::err = "$1.err";
 
$::sep = ',';
 
$::title = 0;
 
$::template = 'Record';
 
$::prefix = "";
 
$::append = 0;
 
 
# Parse the job file
 
if (open JOB,'<',$ARGV[0]) {
 
for (<JOB>) {
 
if (/^\*?\s*csv\s*:\s*(.+?)\s*$/i)        { $::csv = $1 }
 
if (/^\*?\s*wiki\s*:\s*(.+?)\s*$/i)        { $::wiki = $1 }
 
if (/^\*?\s*user\s*:\s*(.+?)\s*$/i)        { $::user = $1 }
 
if (/^\*?\s*pass\s*:\s*(.+?)\s*$/i)        { $::pass = $1 }
 
if (/^\*?\s*separator\s*:\s*"(.+?)"\s*$/i) { $::sep = $1 }
 
if (/^\*?\s*title\s*:\s*(.+?)\s*$/i)      { $::title = $1 }
 
if (/^\*?\s*template\s*:\s*(.+?)\s*$/i)    { $::template = $1 }
 
if (/^\*?\s*prefix\s*:\s*(.+?)\s*$/i)      { $::prefix = $1 }
 
if (/^\*?\s*append\s*:\s*(.+?)\s*$/i)      { $::append = $1 }
 
}
 
close JOB;
 
} else { die "Couldn't parse job file!" }
 
 
 
 
# Open CSV file and read in headings line
 
if (open CSV, '<', $::csv) {
 
$_ = <CSV>;
 
/^\s*(.+?)\s*$/;
 
@headings = split /$::sep/i, $1;
 
} else { die "Could not open CSV file!" }
 
 
# Log in to the wiki
 
wikiLogin($::wiki,$::user,$::pass) or exit;
 
 
# fetch the template if it exists
 
$response = $client->get("$wiki?title=Template:$template&action=raw");
 
if ($response->is_success) {
 
$wikitext = $response->content;
 
 
 
# Remove noinclude areas
 
$wikitext =~ s/<noinclude>.+?<\/noinclude>//gs;
 
 
 
# Find all unique {{{parameters}}}
 
# http://en.wikipedia.org/wiki/Help:Templates#Parameters
 
 
 
$params{$1} = undef while $wikitext =~ /\{{3}(.+?)(\|.*?)?\}{3}/g;
 
 
 
# Create %{param=index} hash
 
foreach ($i = 0; $i <= $#headings; $i++) {
 
$params{$headings[$i]} = $i if exists $params{$headings[$i]};
 
}
 
 
 
if ($::debug) {
 
print "\@headings: @headings\n";
 
print "%params: @{[%params]}\n";
 
}
 
}
 
 
 
# Get batch size and current number (also later account for n-bots)
 
 
 
# todo: log batch start
 
 
# Process the records
 
$n = 1;
 
while (<CSV>) {
 
/^\s*(.+?)\s*$/;
 
@record = split /$::sep/, $1;
 
$tmpl  = "{{$template\n";
 
$tmpl .= "|$_ = $record[$params{$_}]\n" foreach keys %params;
 
$tmpl .= "}}";
 
print "Processing record ".$n++."\n";
 
if ($::debug) {
 
    print "\$tmpl = $tmpl\n";
 
    die  "[\$::debug set exiting]\n" ;
 
}
 
 
 
# Update the record
 
$text  = wikiRawPage($::wiki,$record[$::title],0);
 
$text .= "\n$tmpl" unless $text =~ s/\{\{$template.+?\}\}/$tmpl/is;
 
if ($append) {
 
$done = wikiAppend(
 
$::wiki,
 
$::prefix . $record[$::title],
 
$text,
 
"[[Template:$::template|$::template]] appended using csv2wiki.pl"
 
);
 
} else {
 
$done = wikiEdit(
 
$::wiki,
 
$::prefix . $record[$::title],
 
$text,
 
"[[Template:$::template|$::template]] replacement using csv2wiki.pl"
 
);
 
}
 
 
 
# log a row error if any
 
}
 
 
close CSV;
 

Revision as of 22:03, 15 December 2009

Info.svg This code is in our Git repository here.

Note: If there is no information in this page about this code and it's a MediaWiki extension, there may be something at mediawiki.org.