Difference between revisions of "Oo-regexp-export.php"

From Organic Design wiki
({{legacy}})
 
(3 intermediate revisions by the same user not shown)
Line 1: Line 1:
<?
+
{{legacy}}
 +
<php><?
 
# Export article to OO format
 
# Export article to OO format
 
# - only does document.php transform and its own wiki markup
 
# - only does document.php transform and its own wiki markup
 +
 +
# Returns an OO-span element from a CSS-style-span
 +
function replace_span($matches) {
 +
$oo = '';
 +
$css = preg_replace("/\\s*([:;])\\s*/m",'$1',$matches[1]);
 +
foreach (explode(';', $css) as $style) {
 +
list($att, $val) = explode(':', $style);
 +
if ($att == 'color') $oo .= " fo:color=\"$val\"";
 +
if ($att == 'font-weight') $oo .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"';
 +
if ($att == 'font-style') $oo .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"';
 +
}
 +
$oo = "<style:properties $oo/>";
 +
if (isset($GLOBALS['oo-styles'][$oo])) $tN = $GLOBALS['oo-styles'][$oo]; else {
 +
$tN = $GLOBALS['oo-styles'][$oo] = 5+count($GLOBALS['oo-styles']);
 +
$GLOBALS['oo-style-indexes'][$tN] = $oo;
 +
}
 +
return "<text:span text:style-name=\"T$tN\">";
 +
}
 +
 +
function replace_code($matches) {
 +
$text = '';
 +
foreach (preg_split('/<br *\\/>/', $matches[1]) as $line)
 +
$text .= "<text:p text:style-name=\"Code\">$line</text:p>\n";
 +
$text = preg_replace_callback('/<span style="(.*?)">/', 'replace_span', $text);
 +
$text = preg_replace('/<\\/span>/', '</text:span>', $text);
 +
return "</text:p>\n$text\n<text:p text:style-name=\"Text body\">\n";
 +
}
  
 
# Main wiki->oo parser
 
# Main wiki->oo parser
 
function ooParse($text) {
 
function ooParse($text) {
 
$script = $GLOBALS['xwScript'];
 
$script = $GLOBALS['xwScript'];
$bToken = ':-:'.'bullet'.':-:';
+
for ($i = 0; $i < 10; $i++) {
$hToken = ':-:'.'heading'.':-:';
+
$token = "token$i";
 
+
$$token = ":-:$token:-:";
 +
}
 
# ;text (pre)
 
# ;text (pre)
$text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$hToken\\1\n\n", $text);
+
$text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$token2\\1\n\n", $text);
 
 
 
# *text (pre)
 
# *text (pre)
$text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$bToken\\1\\2$bToken", $text);
+
$text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$token1\\1\\2$token1", $text);
 
+
# <pre> (pre)
 +
$text = preg_replace("/<pre>(.+?)<\\/pre>/es", '"\n\nPR$token3".htmlentities("$1")."\n\n"', $text);
 +
# no-wiki (pre)
 +
$text = preg_replace("/<[n]owiki>(.+?)<\\/nowiki>/s", "\n\nNW$token3$1\n\n", $text);
 +
# xwcode (pre)
 +
$text = preg_replace('/<!-- xwcode-start --><div class="xwcode">(.*?)<.div><!-- xwcode-end -->/s', "\n\nXW$token3$1\n\n", $text);
 
$xml = '';
 
$xml = '';
foreach (explode("\n\n", trim(htmlentities($text))) as $text) {
+
foreach (explode("\n\n", trim($text)) as $text) {
 
+
if (!ereg($token3, $text)) {
# '''text'''
+
$text = htmlentities($text);
$text = preg_replace("/'''(.+?)'''/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text);
+
# '''text'''
+
$text = preg_replace("/'''(.+?)'''/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text);
# ''text''
+
# ''text''
$text = preg_replace("/''(.+?)''/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text);
+
$text = preg_replace("/''(.+?)''/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text);
+
# [[Wikipedia:article|anchor]]
# [[Wikipedia:article|anchor]]
+
$text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text);
$text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text);
+
# [[article|anchor]]
+
$text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text);
# [[article|anchor]]
+
# [[article]]
$text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text);
+
$text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text);
+
# [url anchor]
# [[article]]
+
$text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text);
$text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text);
+
# [url]
+
$text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text);
# [url anchor]
+
# <hr> (pre)
$text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text);
+
$text = str_replace('&lt;hr&gt;', $token4, $text);
+
}
# [url]
 
$text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text);
 
 
 
$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n";
 
$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n";
 
}
 
}
 
 
# ;text (post)
 
# ;text (post)
$xml = preg_replace("/(?<=>)$hToken(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml);
+
$xml = preg_replace("/(?<=>)$token2(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml);
 
 
 
# *text (post)
 
# *text (post)
$xml= preg_replace("/$bToken\\*+(.*?)$bToken/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml);
+
$xml= preg_replace("/$token1\\*+(.*?)$token1/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml);
 
$xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml);
 
$xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml);
 
$xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml);
 
$xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml);
 
+
# no-wiki (post)
# <hr>
+
$xml = str_replace("NW$token3",'', $xml);
$xml = str_replace('&lt;hr&gt;</text:p>', "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml);
+
# <hr> (post)
$xml = str_replace('&lt;hr&gt;', "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml);
+
$xml = str_replace("$token4</text:p>", "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml);
 
+
$xml = str_replace($token4, "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml);
# &nbsp;
+
# spaces
 
$xml = str_replace('&amp;nbsp;', '&nbsp;', $xml);
 
$xml = str_replace('&amp;nbsp;', '&nbsp;', $xml);
 
+
$xml = str_replace('&nbsp; ', '&nbsp;&nbsp;', $xml);
 +
$xml = preg_replace("/((&nbsp;)+)/e",'"<text:s text:c=\"".(strlen("$1")/6)."\"/>"',$xml);
 +
# tables (just remove for now)
 +
$xml = preg_replace("/&lt;\\/?(center|table|tr|th|td).*?&gt;/i",'',$xml);
 +
# convert xwcode and syntax colours
 +
$xml = preg_replace_callback("/..$token3(.+?)(?=<\\/text:p>)/s", 'replace_code', $xml);
 
return $xml;
 
return $xml;
 
}
 
}
  
 
$htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT'];
 
$htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT'];
 +
$GLOBALS['oo-styles'] = array();
 +
$GLOBALS['oo-style-indexes'] = array();
 
$error = false;
 
$error = false;
  
Line 74: Line 109:
 
$content = xwArticleContent('oo-doc-content.xml');
 
$content = xwArticleContent('oo-doc-content.xml');
 
$styles = xwArticleContent('oo-doc-styles.xml');
 
$styles = xwArticleContent('oo-doc-styles.xml');
 +
$manifest = xwArticleContent('oo-doc-manifest.xml');
  
 
# Insert our document info into existing tokens
 
# Insert our document info into existing tokens
Line 80: Line 116:
 
$styles = str_replace('My-Header', "$script/$title", $styles);
 
$styles = str_replace('My-Header', "$script/$title", $styles);
  
# Convert problem characters (should make them work instead!)
+
# Preserve problem characters so they don't get converted to entities
$article = str_replace('’',"'",$article);
+
$cToken = '::--::';
$article = str_replace('…','...',$article);
+
$specialChrs = array('…', '–', '‘', '’', '“', '�');
$article = str_replace('–','-',$article);
+
foreach ($specialChrs as $i => $c) $article = str_replace($c, "$i$cToken", $article);
$article = preg_replace('/“|�/','"',$article);
 
  
 
# Build main document body
 
# Build main document body
Line 105: Line 140:
 
# Insert the constructed body into the xml template
 
# Insert the constructed body into the xml template
 
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content);
 
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content);
 +
 +
# Recover the special characters
 +
foreach ($specialChrs as $i => $c) $content = str_replace("$i$cToken", $c, $content);
 +
 +
# Insert OO'd CSS styles
 +
$ooStyles = '';
 +
foreach ($GLOBALS['oo-styles'] as $style => $i) $ooStyles .= "<style:style style:name=\"T$i\" style:family=\"text\">\n$style\n</style:style>\n";
 +
$content = str_replace('</office:automatic-styles>', "$ooStyles\n</office:automatic-styles>", $content);
  
 
# Insert TOC msg into template
 
# Insert TOC msg into template
Line 116: Line 159:
 
$fh = fopen($file2 = "$tmp/styles.xml",'w+');
 
$fh = fopen($file2 = "$tmp/styles.xml",'w+');
 
fwrite($fh, $styles);
 
fwrite($fh, $styles);
 +
fclose($fh);
 +
$fh = fopen($file3 = "$tmp/manifest.xml",'w+');
 +
fwrite($fh, $manifest);
 
fclose($fh);
 
fclose($fh);
  
 
# Add the files to the sxw archive...
 
# Add the files to the sxw archive...
 
$zip = new PclZip($sxw);
 
$zip = new PclZip($sxw);
if ($zip->create( array($file1, $file2), PCLZIP_OPT_REMOVE_ALL_PATH )) {
+
if ($zip->create(array($file1, $file2),PCLZIP_OPT_REMOVE_ALL_PATH)) {
 +
$zip->add($file3,PCLZIP_OPT_REMOVE_ALL_PATH,PCLZIP_OPT_ADD_PATH,'META-INF');
 
# ...and send as raw data back to browser as title.sxw
 
# ...and send as raw data back to browser as title.sxw
 
ob_end_clean();
 
ob_end_clean();
Line 129: Line 176:
 
@unlink($file1);
 
@unlink($file1);
 
@unlink($file2);
 
@unlink($file2);
 +
@unlink($file3);
 
@unlink($sxw);
 
@unlink($sxw);
 
@rmdir($tmp);
 
@rmdir($tmp);
Line 141: Line 189:
 
@unlink($sxw);
 
@unlink($sxw);
 
@rmdir($tmp);
 
@rmdir($tmp);
?>
+
?></php>

Latest revision as of 10:00, 22 September 2007

Legacy.svg Legacy: This article describes a concept that has been superseded in the course of ongoing development on the Organic Design wiki. Please do not develop this any further or base work on this concept, this is only useful for a historic record of work done. You may find a link to the currently used concept or function in this article, if not you can contact the author to find out what has taken the place of this legacy item.

<php><?

  1. Export article to OO format
  2. - only does document.php transform and its own wiki markup

# Returns an OO-span element from a CSS-style-span function replace_span($matches) { $oo = ; $css = preg_replace("/\\s*([:;])\\s*/m",'$1',$matches[1]); foreach (explode(';', $css) as $style) { list($att, $val) = explode(':', $style); if ($att == 'color') $oo .= " fo:color=\"$val\""; if ($att == 'font-weight') $oo .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"'; if ($att == 'font-style') $oo .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"'; } $oo = "<style:properties $oo/>"; if (isset($GLOBALS['oo-styles'][$oo])) $tN = $GLOBALS['oo-styles'][$oo]; else { $tN = $GLOBALS['oo-styles'][$oo] = 5+count($GLOBALS['oo-styles']); $GLOBALS['oo-style-indexes'][$tN] = $oo; } return "<text:span text:style-name=\"T$tN\">"; }

function replace_code($matches) { $text = ; foreach (preg_split('/
/', $matches[1]) as $line) $text .= "<text:p text:style-name=\"Code\">$line</text:p>\n"; $text = preg_replace_callback('//', 'replace_span', $text); $text = preg_replace('/<\\/span>/', '</text:span>', $text); return "</text:p>\n$text\n<text:p text:style-name=\"Text body\">\n"; }

# Main wiki->oo parser function ooParse($text) { $script = $GLOBALS['xwScript']; for ($i = 0; $i < 10; $i++) { $token = "token$i"; $$token = ":-:$token:-:"; } # ;text (pre) $text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$token2\\1\n\n", $text); # *text (pre) $text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$token1\\1\\2$token1", $text);

#

 (pre)
		$text = preg_replace("/
(.+?)<\\/pre>/es", '"\n\nPR$token3".htmlentities("$1")."\n\n"', $text);
		# no-wiki (pre)
		$text = preg_replace("/<[n]owiki>(.+?)<\\/nowiki>/s", "\n\nNW$token3$1\n\n", $text);
		# xwcode (pre)
		$text = preg_replace('/
(.*?)<.div>/s', "\n\nXW$token3$1\n\n", $text); $xml = ; foreach (explode("\n\n", trim($text)) as $text) { if (!ereg($token3, $text)) { $text = htmlentities($text); # text $text = preg_replace("/(.+?)/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text); # text $text = preg_replace("/(.+?)/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text); # anchor $text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text); # anchor $text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text); # article $text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text); # [url anchor] $text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text); # [url] $text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text); #
(pre) $text = str_replace('<hr>', $token4, $text); } $xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n"; } # ;text (post) $xml = preg_replace("/(?<=>)$token2(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml); # *text (post) $xml= preg_replace("/$token1\\*+(.*?)$token1/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml); $xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml); $xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml); # no-wiki (post) $xml = str_replace("NW$token3",, $xml); #
(post) $xml = str_replace("$token4</text:p>", "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml); $xml = str_replace($token4, "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml); # spaces $xml = str_replace('&nbsp;', ' ', $xml); $xml = str_replace('  ', '  ', $xml); $xml = preg_replace("/(( )+)/e",'"<text:s text:c=\"".(strlen("$1")/6)."\"/>"',$xml); # tables (just remove for now) $xml = preg_replace("/<\\/?(center|table|tr|th|td).*?>/i",,$xml); # convert xwcode and syntax colours $xml = preg_replace_callback("/..$token3(.+?)(?=<\\/text:p>)/s", 'replace_code', $xml); return $xml; } $htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT']; $GLOBALS['oo-styles'] = array(); $GLOBALS['oo-style-indexes'] = array(); $error = false; # Create a unique working folder to build zip in mkdir($tmp = "$htdocs/wiki/tmp/".uniqid('export')); $sxw = "$tmp/sxw"; # Zip compression library - www.phpconcept.net/pclzip/ define('PCLZIP_TEMPORARY_DIR', "$tmp/"); require_once("$htdocs/wiki/xmlwiki/lib/pclzip/pclzip.lib.php"); # Get OO content and styles template articles $content = xwArticleContent('oo-doc-content.xml'); $styles = xwArticleContent('oo-doc-styles.xml'); $manifest = xwArticleContent('oo-doc-manifest.xml'); # Insert our document info into existing tokens $content = str_replace('Title', str_replace('_',' ',$title), $content); $content = str_replace('Version', 'Exported by '.$GLOBALS['xwUserName'].' on '.date('d M Y').' at '.date('H:i:s'), $content); $styles = str_replace('My-Header', "$script/$title", $styles); # Preserve problem characters so they don't get converted to entities $cToken = '::--::'; $specialChrs = array('…', '–', '‘', '’', '“', 'â€?'); foreach ($specialChrs as $i => $c) $article = str_replace($c, "$i$cToken", $article); # Build main document body # - the match splits the article into sections at headings, extracting heading, level & text $body = ; $last = 0; $toclist = array(); if (preg_match_all('/

<tmp num="(#*)" title="(.*?)" anchor="(.*?)".><.h1>/s', $article, $sections, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { foreach ($sections as $m) { $level = strlen($m[1][0]); $anchor = htmlentities($m[3][0]); if ($i = $m[0][1] - $last) $body .= ooParse(substr($article, $last, $i)); $style = $level > 1 ? "Heading $level" : 'P11'; if ($level && $level < 3) $toclist[] = array('P'.($level+7), $anchor); if ($anchor) $body .= "<text:h text:style-name=\"$style\" text:level=\"$level\">$anchor</text:h>\n"; $last = $m[0][1] + strlen($m[0][0]); } } # Handle last section (or all if there were no headings if ($i = strlen($article) - $last) $body .= ooParse(substr($article, $last, $i)); # Insert the constructed body into the xml template $content = str_replace('</office:body>', "\n$body\n</office:body>", $content); # Recover the special characters foreach ($specialChrs as $i => $c) $content = str_replace("$i$cToken", $c, $content); # Insert OO'd CSS styles $ooStyles = ; foreach ($GLOBALS['oo-styles'] as $style => $i) $ooStyles .= "<style:style style:name=\"T$i\" style:family=\"text\">\n$style\n</style:style>\n"; $content = str_replace('</office:automatic-styles>', "$ooStyles\n</office:automatic-styles>", $content); # Insert TOC msg into template $toc = "<text:p text:style-name=\"P8\">Contents is out of date, please update fields from tools menu\n<text:tab-stop/>???</text:p>\n"; $content = str_replace('</text:index-body>', "$toc\n</text:index-body>", $content); # Dump the content & styles templates to files in the working folder $fh = fopen($file1 = "$tmp/content.xml",'w+'); fwrite($fh, $content); fclose($fh); $fh = fopen($file2 = "$tmp/styles.xml",'w+'); fwrite($fh, $styles); fclose($fh); $fh = fopen($file3 = "$tmp/manifest.xml",'w+'); fwrite($fh, $manifest); fclose($fh); # Add the files to the sxw archive... $zip = new PclZip($sxw); if ($zip->create(array($file1, $file2),PCLZIP_OPT_REMOVE_ALL_PATH)) { $zip->add($file3,PCLZIP_OPT_REMOVE_ALL_PATH,PCLZIP_OPT_ADD_PATH,'META-INF'); # ...and send as raw data back to browser as title.sxw ob_end_clean(); header('Content-type: application/zip'); header("Content-Disposition: attachment; filename=\"$title.sxw\""); @readfile($sxw); # Clean up and die @unlink($file1); @unlink($file2); @unlink($file3); @unlink($sxw); @rmdir($tmp); die; } else $error = $zip->errorInfo(true); # Error, clean up files xwMessage($error,'red'); @unlink($file1); @unlink($file2); @unlink($sxw); @rmdir($tmp); ?></php>