Difference between revisions of "Oo-regexp-export.php"

From Organic Design wiki
 
Line 7: Line 7:
 
$script = $GLOBALS['xwScript'];
 
$script = $GLOBALS['xwScript'];
 
$bToken = ':-:'.'bullet'.':-:';
 
$bToken = ':-:'.'bullet'.':-:';
 +
$hToken = ':-:'.'heading'.':-:';
  
# Convert problem quotes and dashes
+
# ;text (pre)
$text = str_replace('–','-',$text);
+
$text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$hToken\\1\n\n", $text);
$text = preg_replace('/“|�/','"',$text);
 
 
 
# TODO ; :
 
  
 
# *text (pre)
 
# *text (pre)
Line 43: Line 41:
 
$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n";
 
$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n";
 
}
 
}
 +
 +
# ;text (post)
 +
$xml = preg_replace("/(?<=>)$hToken(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml);
  
 
# *text (post)
 
# *text (post)
Line 62: Line 63:
 
$error = false;
 
$error = false;
  
# Create a unique working folder and copy OO doc file-template to new file in it
+
# Create a unique working folder to build zip in
 
mkdir($tmp = "$htdocs/wiki/tmp/".uniqid('export'));
 
mkdir($tmp = "$htdocs/wiki/tmp/".uniqid('export'));
copy("$htdocs/wiki/xmlwiki/doc-template.sxw", $sxw = "$tmp/sxw");
+
$sxw = "$tmp/sxw";
  
 
# Zip compression library - www.phpconcept.net/pclzip/
 
# Zip compression library - www.phpconcept.net/pclzip/
Line 70: Line 71:
 
require_once("$htdocs/wiki/xmlwiki/lib/pclzip/pclzip.lib.php");
 
require_once("$htdocs/wiki/xmlwiki/lib/pclzip/pclzip.lib.php");
  
# Get OO content template article
+
# Get OO content and styles template articles
$xml = xwArticleContent('oo-doc-template.xml');
+
$content = xwArticleContent('oo-doc-content.xml');
 +
$styles = xwArticleContent('oo-doc-styles.xml');
  
 
# Insert our document info into existing tokens
 
# Insert our document info into existing tokens
$xml = str_replace('Title', str_replace('_',' ',$title), $xml);
+
$content = str_replace('Title', str_replace('_',' ',$title), $content);
$xml = str_replace('Version', 'Exported by '.$GLOBALS['xwUserName'].' on '.date('d M Y').' at '.date('H:i:s'), $xml);
+
$content = str_replace('Version', 'Exported by '.$GLOBALS['xwUserName'].' on '.date('d M Y').' at '.date('H:i:s'), $content);
#$xml = str_replace('My-Header', "$script/$title", $xml); # this is in styles
+
$styles = str_replace('My-Header', "$script/$title", $styles);
  
# Build mian document body
+
# Convert problem characters (should make them work instead!)
 +
$article = str_replace('’',"'",$article);
 +
$article = str_replace('…','...',$article);
 +
$article = str_replace('–','-',$article);
 +
$article = preg_replace('/“|�/','"',$article);
 +
 
 +
# Build main document body
 
# - the match splits the article into sections at headings, extracting heading, level & text
 
# - the match splits the article into sections at headings, extracting heading, level & text
$body = ''; $last = 0;
+
$body = ''; $last = 0; $toclist = array();
 
if (preg_match_all('/<h1><tmp num="(#*)" title="(.*?)" anchor="(.*?)".><.h1>/s', $article, $sections, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
 
if (preg_match_all('/<h1><tmp num="(#*)" title="(.*?)" anchor="(.*?)".><.h1>/s', $article, $sections, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
 
foreach ($sections as $m) {
 
foreach ($sections as $m) {
Line 87: Line 95:
 
if ($i = $m[0][1] - $last) $body .= ooParse(substr($article, $last, $i));
 
if ($i = $m[0][1] - $last) $body .= ooParse(substr($article, $last, $i));
 
$style = $level > 1 ? "Heading $level" : 'P11';
 
$style = $level > 1 ? "Heading $level" : 'P11';
 +
if ($level && $level < 3) $toclist[] = array('P'.($level+7), $anchor);
 
if ($anchor) $body .= "<text:h text:style-name=\"$style\" text:level=\"$level\">$anchor</text:h>\n";
 
if ($anchor) $body .= "<text:h text:style-name=\"$style\" text:level=\"$level\">$anchor</text:h>\n";
 
$last = $m[0][1] + strlen($m[0][0]);
 
$last = $m[0][1] + strlen($m[0][0]);
Line 95: Line 104:
  
 
# Insert the constructed body into the xml template
 
# Insert the constructed body into the xml template
$xml = str_replace('</office:body>', "\n$body\n</office:body>", $xml);
+
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content);
  
# Dump the template to content.xml file in the working folder
+
# Insert TOC msg into template
$fh = fopen($file = "$tmp/content.xml",'w+');
+
$toc = "<text:p text:style-name=\"P8\">Contents is out of date, please update fields from tools menu\n<text:tab-stop/>???</text:p>\n";
fwrite($fh, $xml);
+
$content = str_replace('</text:index-body>', "$toc\n</text:index-body>", $content);
 +
 
 +
# Dump the content & styles templates to files in the working folder
 +
$fh = fopen($file1 = "$tmp/content.xml",'w+');
 +
fwrite($fh, $content);
 +
fclose($fh);
 +
$fh = fopen($file2 = "$tmp/styles.xml",'w+');
 +
fwrite($fh, $styles);
 
fclose($fh);
 
fclose($fh);
  
# Add content.xml to the sxw archive...
+
# Add the files to the sxw archive...
 
$zip = new PclZip($sxw);
 
$zip = new PclZip($sxw);
if ($zip->add($file, PCLZIP_OPT_REMOVE_ALL_PATH)) {
+
if ($zip->create( array($file1, $file2), PCLZIP_OPT_REMOVE_ALL_PATH )) {
 
# ...and send as raw data back to browser as title.sxw
 
# ...and send as raw data back to browser as title.sxw
 
ob_end_clean();
 
ob_end_clean();
Line 111: Line 127:
 
@readfile($sxw);
 
@readfile($sxw);
 
# Clean up and die
 
# Clean up and die
@unlink("$tmp/content.xml");
+
@unlink($file1);
@unlink("$tmp/sxw");
+
@unlink($file2);
 +
@unlink($sxw);
 
@rmdir($tmp);
 
@rmdir($tmp);
 
die;
 
die;
Line 119: Line 136:
  
 
# Error, clean up files
 
# Error, clean up files
if ($error) xwMessage($error,'red');
+
xwMessage($error,'red');
if (!@unlink("$tmp/content.xml")) xwMessage("Couldn't remove $tmp/content.xml!",'red');
+
@unlink($file1);
if (!@unlink("$tmp/sxw")) xwMessage("Couldn't remove $tmp/sxw!",'red');
+
@unlink($file2);
if (!@rmdir($tmp)) xwMessage("Couldn't remove $tmp",'red');
+
@unlink($sxw);
 +
@rmdir($tmp);
 
?>
 
?>

Revision as of 21:27, 15 August 2005

<?

  1. Export article to OO format
  2. - only does document.php transform and its own wiki markup

# Main wiki->oo parser function ooParse($text) { $script = $GLOBALS['xwScript']; $bToken = ':-:'.'bullet'.':-:'; $hToken = ':-:'.'heading'.':-:';

# ;text (pre) $text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$hToken\\1\n\n", $text);

# *text (pre) $text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$bToken\\1\\2$bToken", $text);

$xml = ; foreach (explode("\n\n", trim(htmlentities($text))) as $text) {

# text $text = preg_replace("/(.+?)/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text);

# text $text = preg_replace("/(.+?)/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text);

# anchor $text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text);

# anchor $text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text);

# article $text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text);

# [url anchor] $text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text);

# [url] $text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text);

$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n"; }

# ;text (post) $xml = preg_replace("/(?<=>)$hToken(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml);

# *text (post) $xml= preg_replace("/$bToken\\*+(.*?)$bToken/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml); $xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml); $xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml);

#


$xml = str_replace('<hr></text:p>', "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml); $xml = str_replace('<hr>', "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml);

#   $xml = str_replace('&nbsp;', ' ', $xml);

return $xml; }

$htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT']; $error = false;

  1. Create a unique working folder to build zip in

mkdir($tmp = "$htdocs/wiki/tmp/".uniqid('export')); $sxw = "$tmp/sxw";

  1. Zip compression library - www.phpconcept.net/pclzip/

define('PCLZIP_TEMPORARY_DIR', "$tmp/"); require_once("$htdocs/wiki/xmlwiki/lib/pclzip/pclzip.lib.php");

  1. Get OO content and styles template articles

$content = xwArticleContent('oo-doc-content.xml'); $styles = xwArticleContent('oo-doc-styles.xml');

  1. Insert our document info into existing tokens

$content = str_replace('Title', str_replace('_',' ',$title), $content); $content = str_replace('Version', 'Exported by '.$GLOBALS['xwUserName'].' on '.date('d M Y').' at '.date('H:i:s'), $content); $styles = str_replace('My-Header', "$script/$title", $styles);

  1. Convert problem characters (should make them work instead!)

$article = str_replace('’',"'",$article); $article = str_replace('…','...',$article); $article = str_replace('–','-',$article); $article = preg_replace('/“|�/','"',$article);

  1. Build main document body
  2. - the match splits the article into sections at headings, extracting heading, level & text

$body = ; $last = 0; $toclist = array();

if (preg_match_all('/

<tmp num="(#*)" title="(.*?)" anchor="(.*?)".><.h1>/s', $article, $sections, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { foreach ($sections as $m) { $level = strlen($m[1][0]); $anchor = htmlentities($m[3][0]); if ($i = $m[0][1] - $last) $body .= ooParse(substr($article, $last, $i)); $style = $level > 1 ? "Heading $level" : 'P11'; if ($level && $level < 3) $toclist[] = array('P'.($level+7), $anchor); if ($anchor) $body .= "<text:h text:style-name=\"$style\" text:level=\"$level\">$anchor</text:h>\n"; $last = $m[0][1] + strlen($m[0][0]); } }
  1. Handle last section (or all if there were no headings
if ($i = strlen($article) - $last) $body .= ooParse(substr($article, $last, $i));
  1. Insert the constructed body into the xml template
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content);
  1. Insert TOC msg into template
$toc = "<text:p text:style-name=\"P8\">Contents is out of date, please update fields from tools menu\n<text:tab-stop/>???</text:p>\n"; $content = str_replace('</text:index-body>', "$toc\n</text:index-body>", $content);
  1. Dump the content & styles templates to files in the working folder
$fh = fopen($file1 = "$tmp/content.xml",'w+'); fwrite($fh, $content); fclose($fh); $fh = fopen($file2 = "$tmp/styles.xml",'w+'); fwrite($fh, $styles); fclose($fh);
  1. Add the files to the sxw archive...
$zip = new PclZip($sxw); if ($zip->create( array($file1, $file2), PCLZIP_OPT_REMOVE_ALL_PATH )) { # ...and send as raw data back to browser as title.sxw ob_end_clean(); header('Content-type: application/zip'); header("Content-Disposition: attachment; filename=\"$title.sxw\""); @readfile($sxw); # Clean up and die @unlink($file1); @unlink($file2); @unlink($sxw); @rmdir($tmp); die; } else $error = $zip->errorInfo(true);
  1. Error, clean up files
xwMessage($error,'red'); @unlink($file1); @unlink($file2); @unlink($sxw); @rmdir($tmp); ?>