Forked from tetrillard/smsbackuprestore-extractor.py
Last active
May 28, 2025 16:16
-
-
Save i3roly/e5ec063e561af48c30c4c045746b92fc to your computer and use it in GitHub Desktop.
SMS Backup & Restore : Extract images and videos from a backup file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf8 -*- | |
# SMSBackupRestore extractor | |
# | |
# smsbackuprestore-extractor.py | |
# 24/11/2014 | |
# | |
# This script will extract all images and videos retrieved | |
# from a xml backup of the Android application 'SMS Backup & Restore'. | |
# For each contact, it will create a folder inside the output folder | |
# with all received images and videos. | |
# | |
# Make sure the destination folder is empty otherwise it will create duplicates. | |
# | |
# Links : | |
# https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore | |
# | |
# example: python smsbackuprestore-extractor.py sms-20141122183844.xml medias/ | |
########## | |
# modifications for SMSBackup, originally written for blackberry 10, made by broly | |
# just so i can transition all my stuff to smooth-brain android properly, allowing me to | |
# cleanly/properly transition back to QNX when the time comes in the near-future | |
# how bb10 users can cope with this trash is beyond me. this shit (android) reeks of | |
# "input lag" that those "gamers" claim to observe*. clunky, unresponsive, shitty | |
# way to enter the task manager. it's just all-around inferior to QNX. | |
# (*as if they're 3sds above the mean, plz bois) | |
# | |
# THREE INPUT ARGUMENTS: | |
# <input.xml> from SMSBackup on Blackberry 10 (along with <input.xml.mms> in the same directory) | |
# <output.xml> name of output xml that will embed the data into the xml file | |
# <phone_number> the phone number of the user to whom the text messages belong | |
# - i am sure you can get away using anything here, but since there is now | |
# and <addrs> field that discloses the sender/recipient (i haven't looked at broadcast mms) | |
# it's good to put something here. | |
from __future__ import print_function | |
from lxml import etree | |
from lxml.etree import XMLParser | |
import os | |
import sys | |
import re | |
import string | |
import base64 | |
import glob | |
import time | |
import logging | |
import uuid | |
DEBUGMODE = False | |
IMAGEDATA_DEBUG = False | |
#it's a large database. well, for me it is. | |
p = XMLParser(huge_tree=True, remove_blank_text=True); | |
### XML ETREE SORTING CODE BY ZESK | |
# | |
# https://stackoverflow.com/a/46128043 | |
# | |
## | |
def get_node_key(node, attr=None): | |
"""Return the sorting key of an xml node | |
using tag and attributes | |
""" | |
if attr is None: | |
return '%s' % node.tag + ':'.join([node.get(attr) | |
for attr in sorted(node.attrib)]) | |
# i (broly) added a small hack to this function | |
# because some text messages use higher precision (larger epoch number) | |
# leading them to get sorted incorrectly (later in time) even though | |
# they're actually earlier. | |
# so the date tag's value cannot be treated as a string. | |
if 'date' == attr: | |
epochs = node.get(attr); | |
if(epochs != None): | |
return int(node.get(attr)); | |
else: | |
return 0; | |
elif attr in node.attrib: | |
return '%s:%s' % (node.tag, node.attrib) | |
return '%s' % node.tag | |
def sort_children(node, attr=None): | |
""" Sort children along tag and given attribute. | |
if attr is None, sort along all attributes""" | |
if not isinstance(node.tag, str): # PYTHON 2: use basestring instead | |
# not a TAG, it is comment or DATA | |
# no need to sort | |
return | |
# sort child along attr | |
node[:] = sorted(node, key=lambda child: get_node_key(child, attr)) | |
# and recurse | |
for child in node: | |
sort_children(child, attr) | |
def sort(unsorted_file, sorted_file, attr=None): | |
"""Sort unsorted xml file and save to sorted_file""" | |
tree = etree.parse(unsorted_file, p) | |
root = tree.getroot() | |
sort_children(root, attr) | |
#update the message id according to their properly-sorted order. | |
_id = 1; | |
for msg in root.iter(): | |
if(msg.get('date') != None): | |
msg.set('_id', str(_id)); | |
_id = _id + 1; | |
tree.write(sorted_file, | |
encoding="utf-8", | |
pretty_print=True, | |
xml_declaration = True, | |
standalone="yes") | |
### END SORTING CODE MADE BY ZESK | |
### BEGIN CODE ORIGINALLY CREATEDC BY tetrillard: | |
# | |
# https://gist.github.com/tetrillard/759bf2d165b440e4915c | |
# | |
## | |
if len(sys.argv) < 2: | |
print('usage: %s [sms-backup.xml] [output-folder]' % sys.argv[0]) | |
sys.exit(-1) | |
INPUT_FILE = sys.argv[1] | |
OUTPUT_FILE_NAME = sys.argv[2] | |
YOUR_PHONE_NUMBER = sys.argv[3] | |
if not os.path.isfile(INPUT_FILE): | |
print('File %s not found' % INPUT_FILE) | |
#input file and the BB10 MMS folder have the same name | |
print('[*] Parsing : %s' % INPUT_FILE) | |
MMS_FOLDER = INPUT_FILE + '.mms' | |
tree = etree.parse(INPUT_FILE) | |
mms_list = tree.xpath('.//*[self::sms or self::mms]') | |
total = 0 | |
# Creating a second tree that copies the original tags | |
# But makes changes to the SMSes (on blackberry 10) that are (now) MMSes | |
newtree = etree.Element("smses") | |
# i don't want to add more lines of code doing a count or keepoing account | |
# for the total number of texts (too lazy to check the api, python3 sucks) | |
# overshooting the number of texts is fine, SMSBackup will just stop at the last text. | |
# in theory since the multi-part mms is still a single message, the count shouldn't change. | |
# i'm pushing about ~80k (mb more if i miss some from bb7) texts without dupes, | |
# so really, i doubt any of you BB10ers got me beat | |
# but if you want to be "safe", you set this count value to something larger. | |
newtree.set('count', tree.getroot().get('count')); | |
newtree.set('backup_set', '74300aaa-ba82-44a2-87cd-b7eac8387cfd') | |
newtree.set('backup_date', str(int(time.time()))) | |
newtree.set('type', 'full'); | |
sms_bkup_line1 = etree.Comment('File Created By SMS Backup & Restore v10.21.004 on ' + time.strftime('%d/%m/%Y %H:%M:%S')) | |
sms_bkup_line2 = etree.Comment('\n\nTo view this file in a more readable format, visit https://synctech.com.au/view-backup/\n\n'); | |
newtree.addprevious(sms_bkup_line1); | |
newtree.addprevious(sms_bkup_line2); | |
def sortchildrenby(parent, attr): | |
parent[:] = sorted(parent, key=lambda child: child.get(attr)) | |
for mms in mms_list: | |
address = mms.get('address') | |
contact = mms.get('contact_name') | |
date = mms.get('date') | |
# we have a few guarantees about MMS | |
# 1. if it's both text and images, the images will ALWAYS come first | |
# 2. if it's just an image, it's like the first case | |
# SMSBackup has changed to store the data inside the XML, instead of | |
# their original one directory-per-mms. | |
# so we need to: | |
# 1. check if there's media associated with the message (matching date stamp) | |
# 2. enter the directory of <address>/<datestamp> and find out how many parts there are | |
# 3. convert these parts to the new format that uses part/seq tags. | |
## tetrillard's code here | |
# media_list = mms.xpath('.//part[starts-with(@ct, 'image') or starts-with(@ct, 'video')]') | |
## | |
# for each message, we check for the existence | |
# of a folder named by the timestamp. | |
MSG_MEDIA_FILES = MMS_FOLDER + '/' + address + '/' + date | |
# contact_name is probably the reason bb7->10 SMS app | |
# became so slow after storing a bunch of messages | |
# because if you changed the contact name, it'd | |
# convulse, almost like it pinned things to this attribute. | |
# (make it look nicer by popping contact_name then inserting _id | |
# and restoring them) | |
mms.attrib.pop('contact_name'); | |
backupdate = mms.get('readable_date'); | |
mms.attrib.pop('readable_date'); | |
mms.set('_id','0'); | |
mms.set('readable_date', backupdate); | |
mms.set('contact_name', '(Unknown)'); | |
# set the (fixed/cloned) mms to the original, in case it's an sms | |
# which avoids the first conditional. | |
# -originally sms backup for bb10 used the sms tag, but now | |
# it uses mms, so we needf to clone it and change it. | |
mms_clone = mms; | |
if os.path.exists(MSG_MEDIA_FILES): | |
# print('Found MMS at %s' % MSG_MEDIA_FILES) | |
mms_msg_subdirs = os.listdir(MSG_MEDIA_FILES) | |
# print('xml tag is %s' % etree.tostring(mms, encoding="unicode") ) | |
mms_clone = etree.SubElement(newtree, 'mms'); | |
mms_clone.set('date', mms.get('date')); | |
mms_clone.set('rr', 'null'); | |
mms_clone.set('sub', 'null'); | |
mms_clone.set('timezone', 'null'); | |
mms_clone.set('ct_t', 'application/vnd.wap.multipart.related'); | |
mms_clone.set('read_status', 'null'); | |
mms_clone.set('seen', '1'); | |
mms_clone.set('msg_box', mms.get('type')); | |
mms_clone.set('address', mms.get('address')); | |
mms_clone.set('sub_cs', 'null'); | |
if(mms_clone.get('msg_box') == '1'): | |
mms_clone.set('resp_st', 'null'); | |
mms_clone.set('retr_st', '128'); | |
else: | |
mms_clone.set('resp_st', '128'); | |
mms_clone.set('retr_st', 'null'); | |
mms_clone.set('delivery_r', 'null'); | |
mms_clone.set('text_only', '0'); | |
mms_clone.set('locked', '0'); | |
mms_clone.set('exp', 'null'); | |
if (mms.get('m_id') == None): | |
mms_clone.set('m_id', str(uuid.uuid4())); | |
else: | |
mms_clone.set('m_id', mms.get('m_id')); | |
mms_clone.set('st','null'); | |
mms_clone.set('retr_txt_cs','null'); | |
mms_clone.set('retr_txt','null'); | |
mms_clone.set('creator','com.google.android.apps.messaging'); | |
mms_clone.set('date_sent', '0'); | |
mms_clone.set('read','1'); | |
#try to fill this in later when we iterate through the files | |
mms_clone.set('size','0'); | |
mms_clone.set('rpt_a','null'); | |
mms_clone.set('ct_cls','null'); | |
mms_clone.set('pri', '129'); | |
mms_clone.set('sub_id','1'); | |
if(mms.get('tr_id') == None): | |
mms_clone.set('tr_id','null'); | |
else: | |
mms_clone.set('tr_id', mms.get('tr_id')); | |
mms_clone.set('resp_txt','null'); | |
mms_clone.set('ct_l', 'null'); | |
mms_clone.set('m_cls','personal'); | |
mms_clone.set('d_rpt','129'); | |
mms_clone.set('v', '18'); | |
#set this later after it's sorted | |
mms_clone.set('_id', '0'); | |
mms_clone.set('m_type', '128'); | |
mms_clone.set('r_r_mod', '0'); | |
mms_clone.set('readable_date', mms.get('readable_date')); | |
mms_clone.set('contact_name', '(Unknown)'); | |
# type is now msg_box | |
##### DEBUG ##### | |
if DEBUGMODE: | |
print('mms string is %s ' % etree.tostring(mms_clone, encoding="unicode")) #debug print | |
# two subelements of each mms: | |
# 1. parts | |
# 2. addrs (see LINE 454) | |
# 1. parts (comprised of <part> tags) | |
parts_tag = etree.SubElement(mms_clone, 'parts'); | |
# the part tag is the header, it seems | |
part_seq = -1; | |
part_tag = etree.SubElement(parts_tag, 'part'); | |
part_tag.set('seq', str(part_seq)); | |
part_tag.set('ct', 'application/smil'); | |
part_tag.set('name', 'smil.xml'); | |
part_tag.set('chset', '106'); | |
part_tag.set('cd', 'null'); | |
part_tag.set('fn', 'null'); | |
part_tag.set('cid', 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;smilGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;'); | |
part_tag.set('cl', 'smil.xml'); | |
part_tag.set('ctt_s', 'null'); | |
part_tag.set('ctt_t', 'null'); | |
# always starts with <smil> | |
# <head> | |
# <layout> | |
# can't use xml tags for this obviously, as it's tags embedded within tags. | |
# note: LOLQUOATTHEGOATLOVESOATSLOL (") | |
# GETTINGTOOAMPEDUPDEALINGWITHPYTHON (&) | |
# 1337STARTQUOTE1337 (') | |
# 1337ENDQUOTE1337 (') | |
# are sentinel strings | |
# i.e. they are replaced for the proper characters at the end. | |
# because etree convulses on attributes with lots of double | |
# quotes and ampersands, for whatever reason (tags within tags) | |
part_tag_text_string_header = '1337STARTQUOTE1337GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;smilGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;headGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;layoutGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' | |
part_tag_text_string_body = 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;bodyGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;'; | |
total = len(mms_msg_subdirs) - 1; | |
part_tag_image_added_to_layout = False; | |
part_tag_text_added_to_layout = False; | |
total_msg_size = 0; | |
for i in range(total, -1, -1): | |
part_seq = part_seq + 1; | |
part_tag_body_member = etree.SubElement(parts_tag, 'part'); | |
part_tag_body_member.set('seq', "0"); | |
mms_msg_subpath = MSG_MEDIA_FILES + '/' + mms_msg_subdirs[i]; | |
##### DEBUG ##### | |
if DEBUGMODE: | |
print('file will be at %s' % mms_msg_subpath) #debug | |
# filetype.txt ALWAYS EXISTS in EVERY subdirectory of an MMS directory | |
filetype_file = open(mms_msg_subpath + '/filetype.txt', 'r'); | |
filetype_string = filetype_file.read(); | |
filetype_file.close(); | |
##### DEBUG ##### | |
if DEBUGMODE: | |
print('filetype is %s' % filetype_string); #debug | |
part_tag_body_member.set('ct', filetype_string); | |
part_tag_body_member.set('name', 'null'); | |
part_tag_body_member.set('chset', 'null'); | |
part_tag_body_member.set('cd', 'null'); | |
part_tag_body_member.set('fn', 'null'); | |
# we will 'bottom out' on the text body of the MMS, if it exists. | |
# so just handle all non-text stuff first. | |
suffix = ''; | |
filetype = ''; | |
if('plain' not in filetype_string): | |
filetype = filetype_string[0:filetype_string.find('/')] | |
suffix = filetype_string[filetype_string.find('/')+1::]; | |
else: | |
suffix = 'txt' | |
if('x-vCard' in suffix): | |
suffix = 'vcr'; | |
# the text string for <part seq="-1" is quite the doozie, | |
# since we now have to add tags for each component (image(s) | |
# and text) | |
# PLUS i'm doing a one-pass instead of two, which hurts | |
# the readability of the code (sorry) :/ | |
# | |
# the smil header body seems to only have two possibilities | |
# an image (only) or image and text (obviously text-only is SMS) | |
part_tag_text_string_body_member_info = 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;par dur="5000ms"GETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' | |
# we know each member in the body is similarly referenced, with | |
# simple substitutions (image/text/video) with an appropriate suffix | |
body_member_data_string = ''; | |
if(filetype_string == 'text/plain'): | |
body_member_data_string = 'text00000' + str(part_seq); | |
if (not part_tag_text_added_to_layout): | |
part_tag_text_string_header = part_tag_text_string_header + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;region id=LOLQUOATTHEGOATLOVESOATSLOLTextLOLQUOATTHEGOATLOVESOATSLOL ' + \ | |
'top=LOLQUOATTHEGOATLOVESOATSLOL80%LOLQUOATTHEGOATLOVESOATSLOL ' + \ | |
'left=LOLQUOATTHEGOATLOVESOATSLOL0LOLQUOATTHEGOATLOVESOATSLOL ' + \ | |
'height=LOLQUOATTHEGOATLOVESOATSLOL20%LOLQUOATTHEGOATLOVESOATSLOL ' + \ | |
'width=LOLQUOATTHEGOATLOVESOATSLOL100%LOLQUOATTHEGOATLOVESOATSLOL'; | |
part_tag_text_added_to_layout = True; | |
part_tag_text_string_body_member_info = part_tag_text_string_body_member_info + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;text src=LOLQUOATTHEGOATLOVESOATSLOLtext00000' + str(part_seq) + \ | |
'.' + suffix + 'LOLQUOATTHEGOATLOVESOATSLOL region=LOLQUOATTHEGOATLOVESOATSLOLTextLOLQUOATTHEGOATLOVESOATSLOL'; | |
else: | |
if (filetype == 'image' or filetype == 'video'): #only images and videos need these parameteres | |
if (not part_tag_image_added_to_layout): | |
part_tag_text_string_header = part_tag_text_string_header + 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;' + \ | |
'region id=LOLQUOATTHEGOATLOVESOATSLOLImageLOLQUOATTHEGOATLOVESOATSLOL ' + \ | |
'fit=LOLQUOATTHEGOATLOVESOATSLOLmeetLOLQUOATTHEGOATLOVESOATSLOL ' + \ | |
'top=LOLQUOATTHEGOATLOVESOATSLOL0LOLQUOATTHEGOATLOVESOATSLOL ' + \ | |
'left=LOLQUOATTHEGOATLOVESOATSLOL0LOLQUOATTHEGOATLOVESOATSLOL '; | |
if('video' not in filetype): #use 80% for pictures or applications (pdf) | |
part_tag_text_string_header = part_tag_text_string_header + \ | |
'height=LOLQUOATTHEGOATLOVESOATSLOL80%LOLQUOATTHEGOATLOVESOATSLOL '; | |
else: #but 100% for videos | |
part_tag_text_string_header = part_tag_text_string_header + \ | |
'height=LOLQUOATTHEGOATLOVESOATSLOL100%LOLQUOATTHEGOATLOVESOATSLOL '; | |
part_tag_text_string_header + part_tag_text_string_header + \ | |
'width=LOLQUOATTHEGOATLOVESOATSLOL100%LOLQUOATTHEGOATLOVESOATSLOL'; | |
part_tag_image_added_to_layout = True; | |
body_member_data_string = filetype + '00000' + str(part_seq); | |
if('image' in filetype): #it's an image | |
part_tag_text_string_body_member_info = part_tag_text_string_body_member_info + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;img src=LOLQUOATTHEGOATLOVESOATSLOL' + body_member_data_string + '.' \ | |
+ suffix + 'LOLQUOATTHEGOATLOVESOATSLOL region=LOLQUOATTHEGOATLOVESOATSLOLImageLOLQUOATTHEGOATLOVESOATSLOL'; | |
elif('video' in filetype): #it's a video | |
part_tag_text_string_body_member_info = part_tag_text_string_body_member_info + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;video src=LOLQUOATTHEGOATLOVESOATSLOL' + \ | |
body_member_data_string + '.' + suffix + \ | |
'LOLQUOATTHEGOATLOVESOATSLOL region=LOLQUOATTHEGOATLOVESOATSLOLImageLOLQUOATTHEGOATLOVESOATSLOL'; | |
elif('audio' in filetype): #audio, | |
part_tag_text_string_body_member_info = part_tag_text_string_body_member_info + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;audio src=LOLQUOATTHEGOATLOVESOATSLOL' + \ | |
body_member_data_string + '.' + suffix + 'LOLQUOATTHEGOATLOVESOATSLOL'; | |
else: #pdf/vcard or some other shit | |
part_tag_text_string_body_member_info = part_tag_text_string_body_member_info + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;ref src=LOLQUOATTHEGOATLOVESOATSLOL' + \ | |
body_member_data_string + '.' + suffix + 'LOLQUOATTHEGOATLOVESOATSLOL'; | |
part_tag_text_string_header = part_tag_text_string_header + '/GETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' | |
part_tag_text_string_body_member_info = part_tag_text_string_body_member_info + \ | |
'/GETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' + 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;/parGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;'; | |
part_tag_text_string_body = part_tag_text_string_body + part_tag_text_string_body_member_info; | |
part_tag_body_member.set('cid', 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;' + body_member_data_string + 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;'); | |
part_tag_body_member.set('cl', body_member_data_string + '.' + suffix); | |
part_tag_body_member.set('ctt_s', 'null'); | |
part_tag_body_member.set('ctt_t', 'null'); | |
# the last iteration this header must be complete since we have iterated through the content | |
# regardless of whether it's an image-only, image-and-text | |
if (i == 0): | |
part_tag_text_string_header = part_tag_text_string_header + 'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;/layoutGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;/headGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;'; | |
part_tag_text_string_body = part_tag_text_string_body + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;/bodyGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;' + \ | |
'GETTINGTOOAMPEDUPDEALINGWITHPYTHONlt;/smilGETTINGTOOAMPEDUPDEALINGWITHPYTHONgt;'; | |
final_string = part_tag_text_string_header + part_tag_text_string_body | |
##### DEBUG ##### | |
if DEBUGMODE: | |
print("part header string before insertion is %s" % final_string); | |
part_tag.set('text', final_string + '1337ENDQUOTE1337'); | |
mms_subpath_media_data = ''; | |
# sometimes ritesh (or the dutch guy) fuck up the file | |
# name, meaning it does not match the name of the parent subdirectory | |
# so we do a glob, as we know there will only be one image in this directory | |
mmsfileglob = glob.glob(str(mms_msg_subpath) + '/*.' + str(suffix)) | |
# in my situation there was an mms with an image missing, so we handle this first | |
if not mmsfileglob: | |
part_tag_body_member.set('data',''); | |
else: | |
for mms_subpath_media_filename in mmsfileglob: | |
total_msg_size = total_msg_size + os.path.getsize(mms_subpath_media_filename); | |
##### DEBUG ##### | |
if DEBUGMODE: | |
print('full filepath is %s' % mms_subpath_media_filename); #debug | |
################# | |
if('plain' not in filetype_string): | |
mms_subpath_media_file = open(mms_subpath_media_filename, 'rb'); | |
mms_subpath_media_data = base64.b64encode(mms_subpath_media_file.read()).decode('UTF-8'); | |
# baby we, we got what we need. so we say you're just a pointer | |
# just a pointer | |
mms_subpath_media_file.close(); | |
part_tag_body_member.set('data', mms_subpath_media_data); | |
if IMAGEDATA_DEBUG: #special flag because it is a lot of text | |
print('the IMAGE data is %s' % mms_subpath_media_data) #debug | |
part_tag_body_member.set('text', 'null'); | |
else: | |
mms_subpath_media_file = open(mms_subpath_media_filename, 'r'); | |
mms_subpath_media_data = mms_subpath_media_file.read() | |
part_tag_body_member.set('text', mms_subpath_media_data); | |
##### DEBUG ##### | |
if DEBUGMODE: | |
print('the TEXT data is %s' % mms_subpath_media_data) #debug | |
##### DEBUG ##### | |
if DEBUGMODE: | |
print('part string looks like this %s' % etree.tostring(part_tag)); #debug | |
#update the size of the message now we've iterated through its elements | |
mms_clone.set('size', str(total_msg_size)); | |
# 2. and addresses (see LINE 275) | |
# "137" is the sender, | |
# "151" is the recipient. | |
addrs_tag = etree.SubElement(mms_clone, 'addrs'); | |
addrs_party = etree.SubElement(addrs_tag, 'addr'); | |
if (mms_clone.get('msg_box') == "1"): #im the recipient | |
addrs_party.set('address', mms_clone.get('address')) | |
addrs_party.set('type', '137'); | |
addrs_other_party = etree.SubElement(addrs_tag, 'addr'); | |
addrs_other_party.set('address', YOUR_PHONE_NUMBER); | |
addrs_other_party.set('type', '151'); | |
else: #i'm the sender | |
addrs_party.set('address', YOUR_PHONE_NUMBER); | |
addrs_party.set('type', '137'); | |
addrs_other_party = etree.SubElement(addrs_tag, 'addr'); | |
addrs_other_party.set('address', mms_clone.get('address')); | |
addrs_other_party.set('type', '151'); | |
else: # if it's not an MMS, keep the BB10 style of using an SMS tag | |
# i suspect RITESH changed to MMS so he could have an easier time | |
# managing the data | |
newtree.append(mms_clone); | |
newtree1 = etree.ElementTree(newtree) | |
newtree1.write(OUTPUT_FILE_NAME + '.xml', | |
encoding="utf-8", pretty_print=True, | |
xml_declaration = True, standalone="yes") | |
## BEGIN SORT | |
# sorting code belongs to zesk. | |
# | |
# https://stackoverflow.com/a/46128043 | |
# fantastic piece of work, even though it's operating on raw data | |
print('[*] Sorting : (' + OUTPUT_FILE_NAME + '-sorted.xml)') | |
sort(OUTPUT_FILE_NAME + '.xml', OUTPUT_FILE_NAME + '-sorted.xml','date') | |
## END SORT | |
print('[*] Substituting : ' + OUTPUT_FILE_NAME + '-sorted.xml') | |
## BEGIN REPLACEMENT OF SENTINEL STRINGS FOR PART SEQ="-1" TEXT HEADER | |
# etree xml does not support single quoted attributes, nor can it deal with nested | |
with open(OUTPUT_FILE_NAME + '-sorted.xml', 'r') as file: | |
filedata = file.read() | |
# once we load this sorted xml into memory, we remove it. | |
os.remove(OUTPUT_FILE_NAME + '-sorted.xml'); | |
# replace the sentinel strings, because etree is a nightmare. | |
filedata = filedata.replace('"1337STARTQUOTE1337', '\'') | |
filedata = filedata.replace('1337ENDQUOTE1337"', '\'') | |
filedata = filedata.replace('LOLQUOATTHEGOATLOVESOATSLOL', '\"') | |
filedata = filedata.replace('GETTINGTOOAMPEDUPDEALINGWITHPYTHON', '&') | |
## FINISH SUBSTITUTION OF SENTINEL STRINGS | |
# Write the file out again | |
with open(OUTPUT_FILE_NAME + '.xml', 'w') as file: | |
file.write(filedata) | |
######## tetrillard's original code | |
# mms_file.close(); | |
# filename = media.get('cl') | |
# rawdata = media.set('data').encode('base64') | |
# outfile = output + '/' + filename | |
## Duplicates handling | |
# i = 1 | |
# while os.path.isfile(outfile): | |
# dname = filename.split('.') | |
# dname.insert(-1, str(i)) | |
# outfile = output + '/' + '.'.join(dname) | |
# i = i+1 | |
# f = open(outfile, 'w') | |
# f.write(rawdata) | |
# f.close() | |
####### | |
print('[*] Job done (%d files created, 1 [sorted] removed)' % total) | |
print('[*] Output file : ' + OUTPUT_FILE_NAME + '.xml') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
success! my boy prendy is the star of the moment!!

BLACKBERRY FOR LIFE.