#!/usr/bin/python ### THIS SCRIPT IS NOW DEAD TO ME. I wrote it as a one-off to migrate my posts from bzero to wordpress. It could easily be adapted to handle just about any content. See the Post class's read() method as it is the only method that reads old content (in conjunction with the fullPath). # scp extra/ to files/ under the weblog root # configure the variables shown below # run the script. it will tell you what categories need to be created. do so or remap as below. RPCURL = 'http://www.friday.com/bbum/xmlrpc.php' BASEURL = 'http://www.friday.com/bbum/' FILEURL = '%s/files/' % BASEURL USER = '' PASS = '' BZEROCONTENT = '~/.bzero/bbum@mac.com' BASEBADURL = 'http://www.pycs.net/bbum/' ALTBADURL= 'http://bbum.pycs.net/' rpcVerbose = 0 import os import sys BZEROCONTENTdata = os.path.expanduser(os.path.join(BZEROCONTENT, 'data')) BZEROCONTENTextra = os.path.expanduser(os.path.join(BZEROCONTENT, 'extra')) import xmlrpclib import time from pprint import pprint server = xmlrpclib.ServerProxy(RPCURL, verbose=rpcVerbose) baseContentDict = { 'mt_allow_comments' : 1, 'mt_allow_pings' : 1 } categoryMap = { "Blogosphere" : "Weblogging", "Metalog" : "Weblogging", "CoreData" : "Core Data", "Creatures" : "Nature", "flickr" : "Flickr", "Hack" : "Hacks", "Design" : "Industrial Design", "Employment" : "Jobs", "OS X" : "Mac OS X", "Photo" : "Photography", "Photos" : "Photography", "Rant" : "Rants", "Weblogs" : "Weblogging", "Silliness" : "Humor", "life" : "Life", "reStructured Text" : None, "ReStructured Text" : None, "tcpflow" : None, "Address Book" : None, "Animation" : "Entertainment", "BitTorrent" : None, "CSS" : None, "Cayman" : None, "Coffee" : None, "DarwinPorts" : None, "Dashboard" : None, "Death" : "Life", "Desktop Software" : "Software", "Downloads" : "Software", "Electricity" : "Science", "Environment" : "Nature", "Facts of Life" : "Life", "Flickr" : None, "Cooking" : "Food", "Fun" : "Entertainment", "House" : None, "Intel" : None, "Java" : "Code", "Legos" : None, "Magnetism" : "Science", "Make" : "Hacks", "MarkDown" : None, "Microsoft Word" : None, "Network" : None, "Objective-C" : "Code", "P2P" : None, "PDF" : None, "Party" : "Entertainment", "Pathetic" : "Irritants", "Phone" : None, "Plants" : "Nature", "Politics" : "Government", "Privacy" : "Government", "Programming Languages" : "Code", "Python" : "Code", "Quartz Composer" : None, "RSS" : None, "Rants" : "Irritants", "ReSTedit" : "Software", "Screen Saver" : "Software", "Security" : None, "Starbucks" : None, "Storage" : None, "Stupidity" : "Irritants", "Subversion" : None, "Tides" : None, "Tiger" : "Mac OS X", "Unit Testing" : None, "Unix" : None, "Video Games" : "Entertainment", "WWDC" : None, "WebObjects" : None, "Wildlife" : "Nature", "World of Warcraft" : "Entertainment", "humor" : "Entertainment", "iMovie" : "Mac OS X", "iTunes" : "Mac OS X", "iTunes Music Store" : "Entertainment", "reStructuredText" : None } def categoryFilter(aCategory): if not len(aCategory): return None if aCategory in categoryMap: return categoryMap[aCategory] else: return aCategory class Post: categories = {} def __init__(self, fullPath): self.fullPath = fullPath self.ite = None self.topics = None self.title = None self.dateTuple = None self.body = "" self._read = False self.postResult = None self.rewritten = False def dump(self): print "### Post\n\t%s" % self.fullPath self.read() print "\tdate: %s" % (self.dateTuple,) print "\ttitle: %s" % (self.title) print "\tite: %s" % (self.ite) print "\ttopics: %s" % (self.topics) print "\tbody len: %d" % (len(self.body)) def read(self): if self._read: return self._read = True f = file(self.fullPath, 'r') self.dateTuple = eval(f.readline()[5:]) for l in f: if l[0] is not '%': break splitLine = l.split(' ', 1) head = splitLine[0][1:] if head in ['topics', 'ite']: data = splitLine[1].split(', ') data = ",".join(data) data = data.split(',') data = [categoryFilter(x.strip()) for x in data] data = filter(lambda x: x, data) setattr(self, head, data) else: setattr(self, head, splitLine[1].strip()) self.body = ''.join([l for l in f]) f.close() if self.topics: for aTopic in self.topics: Post.categories[aTopic] = 1 def pubTitle(self): if not self.title: return '<>' else: return self.title def pubDate(self): if not self.dateTuple: self.dateTuple = time.localtime() return xmlrpclib.DateTime(self.dateTuple) def publish(self): self.read() contentDict = {} contentDict.update(baseContentDict) contentDict['title'] = self.pubTitle() contentDict['description'] = self.body contentDict['dateCreated'] = self.pubDate() print 'Posting: %s' % self.pubTitle() self.postResult = server.metaWeblog.newPost(0, USER, PASS, contentDict, 1) if self.topics: global serverCategories categories = [] for aTopic in self.topics: newCat = serverCategories[aTopic] if newCat not in categories: categories.append({'categoryId' : newCat}) categories.sort() r = server.mt.setPostCategories(self.postResult, USER, PASS, categories) if not r: print "*** FAILED TO SET CATEGORIES *** {%s} %s" % (self.title, self.fullPath) def checkSanityWithURL(self, badURL): self.read() currentIndex = 0 spewOpener = True while 1: problem = self.body.find(badURL, currentIndex) if problem == -1: break endProblem = self.body.find('"', problem) URL = self.body[problem:endProblem] path = URL[len(badURL):] path = path.replace("@20", " ") fullExtraPath = os.path.join(BZEROCONTENTextra, path) if os.path.isfile(fullExtraPath): bodyLen = len(self.body) path = path.replace(" ", "%20") self.body = self.body[:problem] + FILEURL + path + self.body[endProblem:] currentIndex = endProblem + (len(self.body) - bodyLen) self.rewritten = True else: if (endProblem - problem) > 50: currentIndex = endProblem print "BAD:\n%s" % self.fullPath print ">>>>%s<<<<" % self.body[problem:endProblem] continue bodyLen = len(self.body) self.body = self.body[:problem] + BASEURL + path.split('#')[0] + self.body[endProblem:] currentIndex = endProblem + (len(self.body) - bodyLen) self.rewritten = True def checkSanity(self): self.checkSanityWithURL(BASEBADURL) self.checkSanityWithURL(ALTBADURL) def verifyCategories(categories): categories.sort() rawCategories = server.mt.getCategoryList(0, USER, PASS) serverCategories = {} for aCat in rawCategories: name = aCat['categoryName'] id = aCat['categoryId'] serverCategories[name] = id categoriesToCreate = [] for aCat in categories: if not aCat in serverCategories: categoriesToCreate.append(aCat) if len(categoriesToCreate): print "Unfortunately, no API exists to create categories." print "The following categories will need to be created on the server:" for aCat in categoriesToCreate: print aCat sys.exit(0) return serverCategories posts = [] def dataWalk(arg, dir, files): for f in files: if f[0] is '#': continue fullPath = os.path.join(dir, f) if os.path.isfile(fullPath) and fullPath[-1:] is not '~': posts.append(Post(fullPath)) os.path.walk(BZEROCONTENTdata, dataWalk, None) for p in posts: p.checkSanity() serverCategories = verifyCategories(Post.categories.keys()) pprint(serverCategories) for p in posts: p.publish()