Export contacts from Exchange server to LDAP

This article was first written in June 2004 for the BeezNest technical
website (http://glasnost.beeznest.org/articles/132).

This article explains how I managed to export addressbook contact information from a Microsoft Exchange server, to a LDAP server. (The final purpose was to enable all users to use a shared addressbook in Evolution).

I had first to ask the technical employees from the migrated company to export contact information as a set of CSV (Comma-Separated Values) files.

Then I wrote a small Pythin script to convert them to LDIF. The LDIF format can be used with command-line tools to insert/edit the records in a LDAP database, like this:

$ ldapadd -P 2 -x -D cn=myuser,dc=test,dc=be -w password -h ldap.test.be -f test.ldif

Here is the script I used:

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Converts .csv output from Outlook Express addressbook to .grcd GNOME adressbook

import os,sys,csv,ldif,string,re

filename = sys.argv[1]

#
#note: it seems that vcard data must be encoded as CHARSET=UTF-8;QUOTED-PRINTABLE: when they contain data other than ASCII...
#TODO

#note: there seems to be a bug in python csv module...if you don't insert all first fields,
#it doesn't detect them: if you skip field 2, it won't detect field 3, 4, etc.
#--> to report!
# see http://www.python.org/doc/2.3.2/lib/csv-contents.html
#note: we could deduce the fields dictionnary from the first line ;-)
reader = csv.DictReader(file(filename),["Titel","Voornaam","Middelste naam","Achternaam","Achtervoegsel","Bedrijf","Afdeling","Functie","Werkadres, straat","Werkadres 2, straat","Werkadres 3, straat","Werkadres, plaats","Werkadres, provincie","Werkadres, postcode","Werkadres, land","Huisadres, straat","Huisadres, straat 2","Huisadres, straat 3","Huisadres, plaats","Huisadres, provincie","Huisadres, postcode","Huisadres, land","Ander adres, straat","Ander adres, straat 2","Ander adres, straat 3","Ander adres, plaats","Ander adres, provincie","Ander adres, postcode","Ander adres, land","Telefoon assistent","Fax op werk","Telefoon op werk","Telefoon op werk 2","Terugbellen","Autotelefoon","Hoofdtelefoon bedrijf","Fax thuis","Telefoon thuis","Telefoon thuis 2","ISDN","Mobiele telefoon","Andere fax","Andere telefoon","Pager","Hoofdtelefoon","Radiotelefoon","Teksttelefoon","Telex","Account","Beroep","Categorieën","Directory-server","E-mailadres","E-mail, weergegeven naam ","E-mailadres 2","E-mail, weergegeven naam 2","E-mailadres 3","E-mail, weergegeven naam 3","Factuurinformatie","Gebruiker 1","Gebruiker 2","Gebruiker 3","Gebruiker 4","Geslacht","Gevoeligheid","Hobby's","Initialen","Kantoorlocatie","Kinderen","Locatie","Naam assistent","Naam manager","Notities","Organisatie-id","Partner","Postbus","Prioriteit","Privé","Referentie van","Reisafstand","Sofi-nummer","Speciale datum","Taal","Trefwoorden","Verjaardag","Vrije/bezette tijden voor Internet-gebruik","Webpagina"])

xlate = {
 u'N{ACUTE ACCENT}': "'",
 u'N{BROKEN BAR}': '|',
 u'N{CEDILLA}': '{cedilla}',
 u'N{CENT SIGN}': '{cent}',
 u'N{COPYRIGHT SIGN}': '{C}',
 u'N{CURRENCY SIGN}': '{currency}',
 u'N{DEGREE SIGN}': '{degrees}',
 u'N{DIAERESIS}': '{umlaut}',
 u'N{DIVISION SIGN}': '/',
 u'N{FEMININE ORDINAL INDICATOR}': '{^a}',
 u'N{INVERTED EXCLAMATION MARK}': '!',
 u'N{INVERTED QUESTION MARK}': '?',
 u'N{LATIN CAPITAL LETTER A WITH ACUTE}': 'A',
 u'N{LATIN CAPITAL LETTER A WITH CIRCUMFLEX}': 'A',
 u'N{LATIN CAPITAL LETTER A WITH DIAERESIS}': 'A',
 u'N{LATIN CAPITAL LETTER A WITH GRAVE}': 'A',
 u'N{LATIN CAPITAL LETTER A WITH RING ABOVE}': 'A',
 u'N{LATIN CAPITAL LETTER A WITH TILDE}': 'A',
 u'N{LATIN CAPITAL LETTER AE}': 'Ae',
 u'N{LATIN CAPITAL LETTER C WITH CEDILLA}': 'C',
 u'N{LATIN CAPITAL LETTER E WITH ACUTE}': 'E',
 u'N{LATIN CAPITAL LETTER E WITH CIRCUMFLEX}': 'E',
 u'N{LATIN CAPITAL LETTER E WITH DIAERESIS}': 'E',
 u'N{LATIN CAPITAL LETTER E WITH GRAVE}': 'E',
 u'N{LATIN CAPITAL LETTER ETH}': 'Th',
 u'N{LATIN CAPITAL LETTER I WITH ACUTE}': 'I',
 u'N{LATIN CAPITAL LETTER I WITH CIRCUMFLEX}': 'I',
 u'N{LATIN CAPITAL LETTER I WITH DIAERESIS}': 'I',
 u'N{LATIN CAPITAL LETTER I WITH GRAVE}': 'I',
 u'N{LATIN CAPITAL LETTER N WITH TILDE}': 'N',
 u'N{LATIN CAPITAL LETTER O WITH ACUTE}': 'O',
 u'N{LATIN CAPITAL LETTER O WITH CIRCUMFLEX}': 'O',
 u'N{LATIN CAPITAL LETTER O WITH DIAERESIS}': 'O',
 u'N{LATIN CAPITAL LETTER O WITH GRAVE}': 'O',
 u'N{LATIN CAPITAL LETTER O WITH STROKE}': 'O',
 u'N{LATIN CAPITAL LETTER O WITH TILDE}': 'O',
 u'N{LATIN CAPITAL LETTER THORN}': 'th',
 u'N{LATIN CAPITAL LETTER U WITH ACUTE}': 'U',
 u'N{LATIN CAPITAL LETTER U WITH CIRCUMFLEX}': 'U',
 u'N{LATIN CAPITAL LETTER U WITH DIAERESIS}': 'U',
 u'N{LATIN CAPITAL LETTER U WITH GRAVE}': 'U',
 u'N{LATIN CAPITAL LETTER Y WITH ACUTE}': 'Y',
 u'N{LATIN SMALL LETTER A WITH ACUTE}': 'a',
 u'N{LATIN SMALL LETTER A WITH CIRCUMFLEX}': 'a',
 u'N{LATIN SMALL LETTER A WITH DIAERESIS}': 'a',
 u'N{LATIN SMALL LETTER A WITH GRAVE}': 'a',
 u'N{LATIN SMALL LETTER A WITH RING ABOVE}': 'a',
 u'N{LATIN SMALL LETTER A WITH TILDE}': 'a',
 u'N{LATIN SMALL LETTER AE}': 'ae',
 u'N{LATIN SMALL LETTER C WITH CEDILLA}': 'c',
 u'N{LATIN SMALL LETTER E WITH ACUTE}': 'e',
 u'N{LATIN SMALL LETTER E WITH CIRCUMFLEX}': 'e',
 u'N{LATIN SMALL LETTER E WITH DIAERESIS}': 'e',
 u'N{LATIN SMALL LETTER E WITH GRAVE}': 'e',
 u'N{LATIN SMALL LETTER ETH}': 'th',
 u'N{LATIN SMALL LETTER I WITH ACUTE}': 'i',
 u'N{LATIN SMALL LETTER I WITH CIRCUMFLEX}': 'i',
 u'N{LATIN SMALL LETTER I WITH DIAERESIS}': 'i',
 u'N{LATIN SMALL LETTER I WITH GRAVE}': 'i',
 u'N{LATIN SMALL LETTER N WITH TILDE}': 'n',
 u'N{LATIN SMALL LETTER O WITH ACUTE}': 'o',
 u'N{LATIN SMALL LETTER O WITH CIRCUMFLEX}': 'o',
 u'N{LATIN SMALL LETTER O WITH DIAERESIS}': 'o',
 u'N{LATIN SMALL LETTER O WITH GRAVE}': 'o',
 u'N{LATIN SMALL LETTER O WITH STROKE}': 'o',
 u'N{LATIN SMALL LETTER O WITH TILDE}': 'o',
 u'N{LATIN SMALL LETTER SHARP S}': 'ss',
 u'N{LATIN SMALL LETTER THORN}': 'th',
 u'N{LATIN SMALL LETTER U WITH ACUTE}': 'u',
 u'N{LATIN SMALL LETTER U WITH CIRCUMFLEX}': 'u',
 u'N{LATIN SMALL LETTER U WITH DIAERESIS}': 'u',
 u'N{LATIN SMALL LETTER U WITH GRAVE}': 'u',
 u'N{LATIN SMALL LETTER Y WITH ACUTE}': 'y',
 u'N{LATIN SMALL LETTER Y WITH DIAERESIS}': 'y',
 u'N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}': '<<',
 u'N{MACRON}': '_',
 u'N{MASCULINE ORDINAL INDICATOR}': '{^o}',
 u'N{MICRO SIGN}': '{micro}',
 u'N{MIDDLE DOT}': '*',
 u'N{MULTIPLICATION SIGN}': '*',
 u'N{NOT SIGN}': '{not}',
 u'N{PILCROW SIGN}': '{paragraph}',
 u'N{PLUS-MINUS SIGN}': '{+/-}',
 u'N{POUND SIGN}': '{pound}',
 u'N{REGISTERED SIGN}': '{R}',
 u'N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}': '>>',
 u'N{SECTION SIGN}': '{section}',
 u'N{SOFT HYPHEN}': '-',
 u'N{SUPERSCRIPT ONE}': '{^1}',
 u'N{SUPERSCRIPT THREE}': '{^3}',
 u'N{SUPERSCRIPT TWO}': '{^2}',
 u'N{VULGAR FRACTION ONE HALF}': '{1/2}',
 u'N{VULGAR FRACTION ONE QUARTER}': '{1/4}',
 u'N{VULGAR FRACTION THREE QUARTERS}': '{3/4}',
 u'N{YEN SIGN}': '{yen}'
}

nonasciire = re.compile(u'([x00-x7f]+)|([^x00-x7f])', re.UNICODE).sub

def latin1_to_ascii (unicrap):
    return str(nonasciire(lambda x: x.group(1) or xlate.setdefault(ord(x.group(2)), ''), unicrap))

def addIfNonNullElements(dict, name, list):
    """
    adds non-null elements otf this list as value in the dictionnary
    """
    reduced = filter(lambda x: x, list)
    reduced = map(toUTF8, reduced)
    if len(reduced) > 0:
        dict[name] = reduced

def isAscii(ustring):
    """returns true if unicode string can be encoded as ascii string. false otherwise"""
    try:
        ustring.encode('ascii')
        #print "%s is ascii" % ustring
        return True
    except (UnicodeEncodeError,UnicodeDecodeError):
        #print "%s is not ascii" % ustring
        return False

def toUTF8(string):
    return string.decode('utf-8').encode('utf-8')

def cleanAddress(string):
    if not string.replace('$', '').replace(' ','') == '':
        return toUTF8(string)
    else:
        return None

def joinNonNulls(stringList, separator):
    return string.join(filter(lambda x: not x == '', stringList), separator)

#skip first line;
next = reader.next()
for next in reader:

    #hack to correct entries who have first name and family name under 'Voornaam'
    #or who do not have any name...;-)
    if not next['Achternaam']:
        list = string.split(next['Voornaam'])
        if len(list) == 2:
            next['Voornaam'], next['Achternaam'] = tuple(list)
        elif len(list) == 3:
            next['Voornaam'] = list[0]
            next['Achternaam'] = list[1] + ' ' + list[2]
        elif len(list) == 4:
            next['Voornaam'] = list[0]
            next['Achternaam'] = list[1] + ' ' + list[2] + ' ' + list[3]
        elif len(list) == 1:
            next['Voornaam'] = list[0]
            next['Achternaam'] = list[0]
        elif len(list) == 0:
            next['Voornaam'] = next['Bedrijf']
            next['Achternaam'] = next['Bedrijf']
        else:
            raise Exception('problem splitting name: ' + repr(list))

    cn = toUTF8(next['Voornaam'] + ' ' + next['Achternaam'])
    #if not isAscii(cn):
    #  cn = toUTF8(cn)
    dn = toUTF8('cn=' + cn + ',ou=People,dc=profondo-brussels,dc=be')
    print "#%s" % dn  
    entry={'objectClass':['top','person', 'organizationalPerson', 'inetOrgPerson', 'evolutionPerson']}
    addIfNonNullElements(entry,'cn', [cn])
    addIfNonNullElements(entry, 'sn', [next['Achternaam']])

    #additional hack: who would use non-ascii chars in e-mail? ;-)
    emails =  map(latin1_to_ascii, filter(lambda x: x, [next['E-mailadres'], next['E-mailadres 2'], next['E-mailadres 3']]))

    addIfNonNullElements(entry, 'mail', emails)
    addIfNonNullElements(entry, 'fileAs', [next['Achternaam'] + ', ' + next['Voornaam']])
    addIfNonNullElements(entry,'category',[sys.argv[2]])
    addIfNonNullElements(entry,'o',[next['Bedrijf']])
    addIfNonNullElements(entry,'mobile',[next['Mobiele telefoon']])
    addIfNonNullElements(entry,'note',[next['Notities']])
    addIfNonNullElements(entry,'telephoneNumber',[next['Telefoon op werk']])
    addIfNonNullElements(entry,'homePhone',[next['Telefoon thuis']])
    addIfNonNullElements(entry,'facsimileTelephoneNumber',[next['Fax op werk']])
    #entry['ou'] =
    #entry['roomNumber'] =
    addIfNonNullElements(entry,'title',[next['Functie']])
    #entry['businessRole'] =
    #entry['managerName'] =
    #entry['assistantName'] =
    #strange format to store addresses...
    #see http://www.faqs.org/rfcs/rfc2252.html --> 6.27. Postal Address
    addIfNonNullElements(entry,'postalAddress', [cleanAddress(joinNonNulls([next['Werkadres, straat'], next['Werkadres 2, straat'], next['Werkadres, plaats'] + ' ' + next['Werkadres, provincie'] + ' ' + next['Werkadres, postcode'], next['Werkadres, land']], 'n'))])
    addIfNonNullElements(entry,'homePostalAddress', [cleanAddress(joinNonNulls([next['Huisadres, straat'], next['Huisadres, straat 2'], next['Huisadres, plaats'] + ' ' + next['Huisadres, provincie'] + ' ' + next['Huisadres, postcode'], next['Huisadres, land']], 'n'))])
    addIfNonNullElements(entry,'otherPostalAddress', [cleanAddress(joinNonNulls([next['Ander adres, straat'], next['Ander adres, straat 2'], next['Ander adres, plaats'] + ' ' + next['Ander adres, provincie'] + ' ' + next['Ander adres, postcode'], next['Ander adres, land']], 'n'))])

    ldif_writer=ldif.LDIFWriter(sys.stdout)
    ldif_writer.unparse(dn,entry)