Convert Glasnost documents to HTML

If you have the same problem as we have, that most of your technical documentation has been input into Glasnost but that you want to move to Apache 2 and Glasnost doesn’t support it, then you are likely to be looking into a way to convert these Glasnost articles to HTML to insert them in some other tool.

This is a work in progress, so although we wanted to be able to do a one or two-parts script that extracts articles from Glasnost, converts them to HTML and inputs them into Drupal 6, we have been unable to do so (so far) because of the inadequacy of the Drupal solution (we looked into modules to manage access permissions but they’re unpractical, so we are looking for another solution, probably altering the “node permission by role” module to give correct default access values to existing pages/blocks).

Anyway, Frédéric Peters, an acquaintance from the BxLUG, gave us a python script to work on, which I have slightly modified to add more information in the HTML output, and here it is. It is licensed under GNU/GPL v2+, so feel free to use it.

#! /usr/bin/env python

DIR = ‘/var/lib/glasnost/localhost’

import os
import re
import sys
import cPickle
import pprint
import urlparse

sys.path.append(‘/usr/lib/glasnost/’)

import glasnost.common.parsers as parsers
import glasnost.common.context as context

context.push(
applicationId = ‘glasnost://scripts/whatever’,
dispatcherId = ‘glasnost://localhost’,
userToken = None, # Set later.
)

if not os.path.exists(‘out’):
os.mkdir(‘out’)
if not os.path.exists(‘out/articles’):
os.mkdir(‘out/articles’)

# 1st, get aliases

class AdminPageNames: pass
class PageName: pass

aliases = {}
raliases = {}
fd = file(os.path.join(DIR, ‘PageNamesServer.pickle’))
fd.readline()
t = cPickle.load(fd)
fd.close()

for v in t.objects.values():
aliases[v.name] = v.mappedId
raliases[v.mappedId] = v.name

# 2nd, get articles

class AdminArticles: pass
class Article: pass

fd = file(os.path.join(DIR, ‘ArticlesServer.pickle’))
fd.readline()
t = cPickle.load(fd)
fd.close()

def aliasrepl(m):
return m.group(0)[2:-2].split(‘:’)[-1]

def article_id_repl(m):
return ‘./’ + m.group(0)[2:-2].split(‘:’)[-1] + ‘.html’

def article_title_repl(m):
id = m.group(0)[2:-2].split(‘:’)[-1]
return articles_titles.get(id)

articles_titles = {}
for v in t.objects.values():
articles_titles[v.id.split(‘/’)[-1]] = v.title

for v in t.objects.values():
title = v.title
article_lang = v.__dict__.get(‘language’)
perms_temp = v.__dict__.get(‘readersSet’)[0]
if perms_temp == ‘glasnost://system/groups/1′:
perms = ‘public’
else:
perms = ‘private’
createtms = v.__dict__.get(‘creationTime’)
edittms = v.__dict__.get(‘editionTime’)
origid = v.__dict__.get(‘id’)
format = v.__dict__.get(‘format’, ‘spip’)
body = file(os.path.join(DIR, ‘articles’, v.id.split(‘/’)[-1])).read()

if format == ‘spip':
html_text = parsers.makeHtmlFromSpip(body)
elif format == ‘rst':
html_text = parsers.makeHtmlFromReStructuredText(body)
else:
print ‘Unknown format:’, format
sys.exit(0)

html_text = re.sub(r'[{glasnost:alias:.*?}]’, aliasrepl, html_text)
html_text = re.sub(r'[{glasnost:aliaslabel:.*?}]’, ”, html_text)
html_text = re.sub(r'[{glasnost:partialid:localhost:articles:.*?}]’,
article_id_repl, html_text)
html_text = re.sub(r'[{glasnost:label:localhost:articles:.*?}]’,
article_title_repl, html_text)

filename = raliases.get(v.id, ‘articles/%s’ % v.id.split(‘/’)[-1])
fd = file(‘out/’ + filename + ‘.html’, ‘w’)
print >> fd, ”'<html lang=”%s”>
<head>
<title>%s</title>
<meta access=”%s” creationtime=”%s” editiontime=”%s” origid=”%s” />
</head>
<body>
<h1>%s</h1>
%s
</body>
</html>”’ % (article_lang, title, perms, createtms, edittms, origid, title, html_text)
fd.close()

It's only fair to share...Share on FacebookShare on Google+Tweet about this on TwitterShare on LinkedInShare on TumblrEmail this to someone

Leave a Reply

Your email address will not be published. Required fields are marked *