lkraav · November 17, 2009 20:57 · Nov 17, 2009
diff --git a/8kzenrecover.py b/8kzenrecover.py
@@ -0,0 +1,259 @@
+#!/usr/bin/python
+
+# Copyright 2007 by Tobia Conforto <[email protected]>
+#
+# This program is free software; you can redistribute it and/or modify it under the terms of the GNU General
+# Public License as published by the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program.
+# If not, see http://www.gnu.org/licenses/
+
+# Versions: 0.1 2007-08-13 Initial release
+#           0.2 2008-05-12 Small fixes for Zen Xtra models
+#	    0.3 2009-02-23 Zen Vision M compatible version (Leho Kraav <[email protected]>)
+
+from __future__ import division
+import sys, os, codecs, array, time, operator, getopt
+import LRU
+
+class CFS:
+	# 32k cluster on visionm30
+	clusterSize = 0x8000
+	cacheMem = 10 * 2**20 # keep 20MB of recently read clusters in ram
+
+	def __init__(self, filename, offset = 0):
+		'''Filename and optional offset where the CFS filesystem begins
+		(offset of cluster -1, the one filled with 0xff)'''
+		self.image = file(filename)
+		self.offset = offset
+		self.clusterCache = LRU.LRU(self.cacheMem // self.clusterSize)
+
+	def __getitem__(self, key):
+		'''Get the nth CFS cluster from the image and cache it for later usage.
+		Accepts simple slices of clusters, but doesn't process negative indices.
+		In any case it returns the requested data as a byte string.'''
+		if isinstance(key, slice):
+			cstart, cstop = key.start, key.stop
+		else:
+			cstart, cstop = key, key + 1
+		data = ''
+		for cluster in range(cstart, cstop):
+			if cluster not in self.clusterCache:
+				self.image.seek(self.offset + (cluster + 1) * self.clusterSize)
+				self.clusterCache[cluster] = self.image.read(self.clusterSize)
+			data += self.clusterCache[cluster]
+		return data
+
+	def get_byteswapped_data(self, cluster):
+		'''Get the nth CFS cluster from the image, without caching it.
+		Swap the position of every two bytes and return it as an array object.
+		This method is designed for bulk file retrieving.'''
+		a = array.array('H')
+		self.image.seek(self.offset + (cluster + 1) * self.clusterSize)
+		a.fromfile(self.image, self.clusterSize // 2)
+		# visionm 30 doesnt need byte swapping
+		# a.byteswap()
+		return a
+
+	def inode(self, cluster):
+		return CFSInode(self, cluster)
+
+#def pdp_uint32(data, offset = 0):
+#	o2, o1, o4, o3 = map(ord, data[offset : offset + 4])
+#	return (o1 << 24) | (o2 << 16) | (o3 << 8) | o4
+
+def pdp_uint32(data, offset = 0):
+    o4, o3, o2, o1 = map(ord, data[offset : offset + 4])
+    return (o1 << 24) | (o2 << 16) | (o3 << 8) | o4
+
+def pdp_uint16(data, offset = 0):
+	o2, o1 = map(ord, data[offset : offset + 2])
+	return (o1 << 8) | o2
+
+def ucs2string(data, offset, length): # length in bytes
+	return codecs.utf_16_le_decode(data[offset : offset + length])[0]
+
+def pdp_getbit(bitmap, bit_no):
+	return (pdp_uint32(bitmap, bit_no // 32 * 4) >> (bit_no % 32)) & 1
+
+class CFSInode:
+	filename = '(no filename)'
+	filesize = 0
+	path = []
+
+	def __init__(self, cfs, cluster):
+		self.cluster = cluster
+		self.cfs = cfs
+		inode = cfs[cluster]
+		# reading misc flags and values
+		print "pdp_uint: %x" % pdp_uint32(inode[4:8])
+		print "cluster: %x" % cluster
+		assert pdp_uint32(inode[4:8]) == cluster # self-reference
+		self.serial = pdp_uint32(inode, 0x78)
+		# reading metadata
+		count_metadata = pdp_uint32(inode, 0x7c)
+		offset = 0x80
+		self.metadata = {}
+		for i in range(count_metadata):
+			assert pdp_uint16(inode, offset) == 3
+			length = pdp_uint16(inode, offset + 2)
+			tag = ucs2string(inode, offset + 4, 4)
+			self.metadata[tag] = inode[offset + 10 : offset + 10 + length]
+			# byte reordering issue, 07 -> 70, 0= -> =0, 0> -> >0
+			# but we cannot figure out where to get path info, tag '51' doesnt work 
+			if tag == '70':
+				self.filename = ucs2string(inode, offset + 10, length - 2)
+			elif tag == '51':
+				self.path = ucs2string(inode, offset + 10, length - 2).strip('\\').split('\\')
+			elif tag == '>0':
+				self.filesize = pdp_uint32(inode, offset + 10)
+			offset += 10 + length
+		# collecting flat list of data clusters
+		self.dataclusters = []
+		pointerclusters = []
+		for off in range(0x20, 0x4c + 1, 4):
+			c = pdp_uint32(inode, off)
+			if c != 0xFFFFFFFFL:
+				self.dataclusters.append(c)
+		second_class_chain = pdp_uint32(inode, 0x58)
+		if second_class_chain != 0xFFFFFFFFL:
+			pointerclusters.append(second_class_chain)
+		third_class_chain = pdp_uint32(inode, 0x64)
+		# change 0x2000 to 0x8000 -> we might have 32k clusters on visionm 30g
+		if third_class_chain != 0xFFFFFFFFL:
+			for off in range(0, 0x8000, 4):
+				c = pdp_uint32(cfs[third_class_chain], off)
+				if c == 0xFFFFFFFFL:
+					break
+				pointerclusters.append(c)
+
+		# once again, 32k cluster
+		for pnt in pointerclusters:
+			for off in range(0, 0x8000, 4):
+				c = pdp_uint32(cfs[pnt], off)
+				if c == 0xFFFFFFFFL:
+					break
+				self.dataclusters.append(c)
+		# reading directory entries
+		if not self.metadata: # any better way of telling dirs and files apart?
+			count_direntries = pdp_uint32(self, 8)
+			self.direntries = []
+			found = 0
+			# since clusters are 4 times bigger now, we need % 2 and // 2 instead of ... 8
+			assert len(self.dataclusters) % 2 == 0
+			for block_no in range(len(self.dataclusters) // 2):
+				block = self[block_no * 0x10000 : block_no * 0x10000 + 0x10000]
+				bitmap = block[16 : 16 + 204]
+				for n in range(1632):
+					if pdp_getbit(bitmap, n):
+						off = 220 + n * 40
+						self.direntries.append(CFSDirEntry(cfs, block[off : off + 40]))
+						found += 1
+			assert found == count_direntries
+
+	def __getitem__(self, key):
+		'''Returns the given byte (or byte slice) from the file contents.'''
+		if isinstance(key, slice):
+			bstart, bstop = key.start, key.stop
+		else:
+			bstart, bstop = key, key + 1
+		cs = self.cfs.clusterSize
+		cstart = bstart // cs
+		cstop = (bstop - 1) // cs + 1
+		data = ''.join([ self.cfs[x] for x in self.dataclusters[cstart : cstop] ])
+		return data[bstart - cs * cstart : bstop - cs * cstart]
+
+class CFSDirEntry:
+	def __init__(self, cfs, entrydata):
+		self.cluster = pdp_uint32(entrydata) # cluster no. of the inode
+		# length of full filename
+		self.len_filename = pdp_uint16(entrydata, 4)
+		# first 15 chars of filename
+		self.shortname = ucs2string(entrydata, 8, min(30, self.len_filename * 2))
+
+if __name__ == '__main__':
+
+	# commandline arguments
+	optlist, args = getopt.gnu_getopt(sys.argv[1:], 'o:')
+	opts = dict(optlist)
+	offset = int(opts.get('-o', 20 * 2**20))
+
+	if len(args) != 3:
+		print 'Usage: zenrecover.py [-o OFFSET] DISK_OR_IMAGE SECTION OUTPUT_DIR'
+		print 'DISK_OR_IMAGE is the disk containing the filesystem, or an image thereof'
+		print 'OFFSET is the offset at which the filesystem starts (in bytes, default 20M)'
+		print 'SECTION is the section of the filesystem to recover: "archives" or "songs"'
+		print 'OUTPUT_DIR is the directory in which to place the recovered files'
+		sys.exit(1)
+
+	cfs = CFS(args[0], offset)
+	section = args[1]
+	outdir = args[2]
+
+	# find the root inode
+	rootinode = None
+	for c in range(4, 0x10000):
+		if pdp_uint32(cfs[c][:4]) == 0x3bbe0ad9:
+			print "Found inode at cluster 0x%x" % c
+			i = cfs.inode(c)
+			if i.serial != 0xFFFFFFFFL:
+				print "Found inode at cluster 0x%x, but serial number is not -1" % c
+				continue
+			rootinode = i
+			break
+	if not rootinode:
+		raise "Could not find the root inode"
+
+	# find the root directories
+	root = {}
+	for entry in rootinode.direntries:
+		root[entry.shortname] = entry.cluster
+
+	print root
+
+	# begin recovery
+	dirinode = cfs.inode(root[section])
+	os.makedirs(outdir)
+	lastfiles = [(1,1)] # timing of latest few files recovered (size in bytes, time in secs)
+	t = len(dirinode.direntries)
+	for i, entry in enumerate(dirinode.direntries):
+		if entry.shortname != '.':
+			t0 = time.time()
+			inode = cfs.inode(entry.cluster)
+			print
+			m=inode.metadata
+			for j in m:
+				if len(m[j])==4:
+					print repr(j), pdp_uint32(m[j])
+				else:
+					print repr(j), repr(''.join([m[j][x] for x in range(0,len(m[j]),2)]))
+			print '\r%d%% %.1fMB/s "%s" (%.1fMB)\033[K' % (
+					i * 100 // t,
+					operator.truediv(*map(sum, zip(*lastfiles))) / 2**20,
+					inode.filename[:50],
+					inode.filesize / 2**20),
+			sys.stdout.flush()
+			path = os.path.join(outdir, *inode.path)
+			try:
+				os.makedirs(path)
+			except:
+				pass
+			f = file(os.path.join(path, inode.filename), 'w')
+			remaining = inode.filesize
+			for c in inode.dataclusters:
+				if remaining >= cfs.clusterSize:
+					cfs.get_byteswapped_data(c).tofile(f)
+				else:
+					f.write(cfs.get_byteswapped_data(c).tostring()[:remaining])
+				remaining -= min(cfs.clusterSize, remaining)
+			f.close()
+			assert remaining == 0
+			if len(lastfiles) >= 32: #transfer speed is calculated on latest 32 files
+				lastfiles.pop(0)
+			lastfiles.append((inode.filesize, time.time() - t0))
+	print '\rDone.\033[K'