Last active
November 23, 2021 05:52
-
-
Save aaronanderson/fc3a2452d554c1362f4f6d5bb4c815ea to your computer and use it in GitHub Desktop.
Apache POI HSMF Outlook Calendar/Appointment Metadata extraction including recurrence settings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package outlook; | |
import java.nio.ByteBuffer; | |
import java.nio.ByteOrder; | |
import java.security.MessageDigest; | |
import java.time.format.DateTimeFormatter; | |
import java.util.GregorianCalendar; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.Map; | |
import java.util.Map.Entry; | |
import java.util.Optional; | |
import java.util.Set; | |
import java.util.Base64; | |
import org.apache.poi.hsmf.MAPIMessage; | |
import org.apache.poi.hsmf.MAPIMessage.MESSAGE_CLASS; | |
import org.apache.poi.hsmf.datatypes.ByteChunk; | |
import org.apache.poi.hsmf.datatypes.Chunk; | |
import org.apache.poi.hsmf.datatypes.MAPIProperty; | |
import org.apache.poi.hsmf.datatypes.NameIdChunks.PredefinedPropertySet; | |
import org.apache.poi.hsmf.datatypes.PropertiesChunk; | |
import org.apache.poi.hsmf.datatypes.PropertyValue; | |
import org.apache.poi.hsmf.datatypes.StringChunk; | |
public class OutlookMsgParser { | |
//Compiles with Apache POI 5.1.0 | |
//Drag and drop an outlook email or calendar event to a folder. Use the Java code below to parse the appointment and extract | |
//metadata from the appointment such as the meeting date, participants, and recurrence settings. | |
// Standard Outlook named properties can be found here: | |
// https://docs.microsoft.com/en-us/office/client-developer/outlook/mapi/mapi-constants | |
// Install Outlook Spy (https://www.dimastr.com/outspy/home.htm) to view the contents of .msg files. | |
//This tool can help visualize which properties are available and commonly populated in Outlook .msg files. | |
public static void main(String[] args) { | |
String emailPath = "/tmp/my_outlook_appointment.msg"; | |
try (MAPIMessage msg = new MAPIMessage(emailPath)) { | |
String mid = null; | |
if (msg.getMainChunks().getMessageId() != null) { | |
mid = msg.getMainChunks().getMessageId().getValue(); | |
} else if (msg.getMainChunks().getSubmissionChunk() != null) { | |
mid = msg.getMainChunks().getSubmissionChunk().getSubmissionId(); | |
} | |
// digest the message id to create a more compact unique identifier | |
MessageDigest md = MessageDigest.getInstance("SHA-256"); | |
final String fmid = mid; | |
Optional.ofNullable(mid).ifPresent(c -> System.out.format("Message ID: %s\n", Base64.getEncoder().encodeToString(md.digest(fmid.getBytes())))); | |
Optional.ofNullable(msg.getMessageDate()).ifPresent(c -> System.out.format("Date: %s\n", formatDate((GregorianCalendar) c))); | |
Optional.ofNullable(msg.getSubject()).ifPresent(s -> System.out.format("Subject: %s\n", s)); | |
if (msg.getMessageClassEnum() == MESSAGE_CLASS.APPOINTMENT) { | |
Set<Integer> propertTags = new HashSet<>(); | |
// These properties use name ID redirection. Use Outlook Spy to find properties of interest in a .msg file. | |
//Lookup the Named Property section and confirm the GUID and ID. Lookup the actual property tag number in the name ID | |
//chunk. The tag ID will refer to a property or chunk ID in the file. | |
int allAttendeesTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AllAttendeesString", 0x8238); | |
int locationTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "Location", 0x8208); | |
int appointmentStartTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentStartWhole", 0x820D); | |
int appointmentEndTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentEndWhole", 0x820E); | |
int clipStartTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "ClipStart", 0x8235); | |
int clipEndTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "ClipEnd", 0x8236); | |
int recurringTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "Recurring", 0x8223); | |
int recurrenceTypeTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "RecurrenceType", 0x8231); | |
int appointmentRecurTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentRecur", 0x8216); | |
int appointmentDurationTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentDuration", 0x8213); | |
propertTags.add(allAttendeesTag); | |
propertTags.add(locationTag); | |
propertTags.add(appointmentStartTag); | |
propertTags.add(appointmentEndTag); | |
propertTags.add(clipStartTag); | |
propertTags.add(clipEndTag); | |
propertTags.add(recurringTag); | |
propertTags.add(recurrenceTypeTag); | |
propertTags.add(appointmentRecurTag); | |
propertTags.add(appointmentDurationTag); | |
Map<Integer, Object> extendedProperties = extendedProperties(propertTags, msg); | |
// HSMFDump dump = new HSMFDump(new POIFSFileSystem(new File(emailPath))); | |
// dump.dump(System.out); | |
Optional.ofNullable(extendedProperties.get(allAttendeesTag)).ifPresent(c -> System.out.format("All Attendees: %s\n", c)); | |
Optional.ofNullable(extendedProperties.get(locationTag)).ifPresent(p -> System.out.format("Location: %s\n", p)); | |
Optional.ofNullable(extendedProperties.get(appointmentStartTag)).ifPresent(p -> System.out.format("Appointment Start: %s\n", formatDate((GregorianCalendar) p))); | |
Optional.ofNullable(extendedProperties.get(appointmentEndTag)).ifPresent(p -> System.out.format("Appointment End: %s\n", formatDate((GregorianCalendar) p))); | |
Optional.ofNullable(extendedProperties.get(clipStartTag)).ifPresent(p -> System.out.format("Clip StartTag: %s\n", formatDate((GregorianCalendar) p))); | |
Optional.ofNullable(extendedProperties.get(clipEndTag)).ifPresent(p -> System.out.format("Clip EndTag: %s\n", formatDate((GregorianCalendar) p))); | |
Optional.ofNullable(extendedProperties.get(recurringTag)).ifPresent(p -> System.out.format("Recurring: %s\n", p)); | |
Optional.ofNullable(extendedProperties.get(recurrenceTypeTag)).ifPresent(p -> System.out.format("Recurrence Type: %s - %s\n", p, RecurrenceType.recurrenceTypeOf((Integer) p))); | |
Optional.ofNullable(extendedProperties.get(appointmentRecurTag)).ifPresent(p -> { | |
// see | |
// https://interoperability.blob.core.windows.net/files/MS-OXOCAL/%5BMS-OXOCAL%5D.pdf | |
// page 34, 113 for the recurrence blob format | |
byte[] blobBytes = (byte[]) p; | |
// System.out.format("Appointment Recur - Blob: %s\n", Hex.encodeHexString(blob)); | |
ByteBuffer blob = ByteBuffer.wrap(blobBytes); | |
blob.order(ByteOrder.LITTLE_ENDIAN); | |
short readerVersion = blob.getShort(); | |
short writerVersion = blob.getShort(); | |
short recurFrequency = blob.getShort(); | |
short patternType = blob.getShort(); | |
short calendarType = blob.getShort(); | |
int firstDateTime = blob.getInt(); | |
int period = blob.getInt(); | |
int slidingFlag = blob.getInt(); | |
int patternTypeSpecific = blob.getInt(); | |
int endType = blob.getInt(); | |
int occurrenceCount = blob.getInt(); | |
int firstDOW = blob.getInt(); | |
int deletedInstanceCount = blob.getInt(); | |
int modifiedInstanceCount = blob.getInt(); | |
System.out.format("Appointment Recur - RecurFrequency: %s - %s\n", Integer.toHexString(recurFrequency), RecurFrequency.frequencyOf(recurFrequency)); | |
System.out.format("Appointment Recur - PatternType: %s - %s\n", Integer.toHexString(patternType), PatternType.patternOf(patternType)); | |
System.out.format("Appointment Recur - Period: %s\n", Integer.toHexString(period)); | |
System.out.format("Appointment Recur - PatternTypeSpecific: %s\n", formatPatternTypeSpecific(PatternType.patternOf(patternType), patternTypeSpecific)); | |
}); | |
Optional.ofNullable(extendedProperties.get(appointmentDurationTag)).ifPresent(p -> System.out.format("Appointment Duration: %s\n", p)); | |
} else { | |
Optional.ofNullable(msg.getDisplayFrom()).ifPresent(c -> System.out.format("From: %s\n", c)); | |
Optional.ofNullable(msg.getDisplayTo()).ifPresent(c -> System.out.format("To: %s\n", c)); | |
Optional.ofNullable(msg.getDisplayCC()).ifPresent(c -> System.out.format("CC: %s\n", c)); | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static enum RecurrenceType { | |
/* single */rectypeNone(0x00000000), rectypeDaily(0x00000001), rectypeWeekly(0x00000002), rectypeMonthly(0x00000003), rectypeYearly(0x00000004); | |
private final int recurrenceType; | |
private RecurrenceType(int value) { | |
this.recurrenceType = value; | |
} | |
int recurrenceType() { | |
return this.recurrenceType; | |
} | |
public static RecurrenceType recurrenceTypeOf(int recurrenceType) { | |
for (RecurrenceType r : RecurrenceType.values()) { | |
if (r.recurrenceType == recurrenceType) { | |
return r; | |
} | |
} | |
return null; | |
} | |
} | |
public static enum RecurFrequency { | |
Daily(0x200A), Weekly(0x200B), Monthly(0x200C), Yearly(0x200D); | |
private final int frequency; | |
private RecurFrequency(int value) { | |
this.frequency = value; | |
} | |
int frequency() { | |
return this.frequency; | |
} | |
public static RecurFrequency frequencyOf(int frequency) { | |
for (RecurFrequency r : RecurFrequency.values()) { | |
if (r.frequency == frequency) { | |
return r; | |
} | |
} | |
return null; | |
} | |
} | |
public static enum PatternType { | |
Day(0x0000), Week(0x0001), Month(0x0002), MonthEnd(0x0004), MonthNth(0x0003); | |
private final int pattern; | |
private PatternType(int value) { | |
this.pattern = value; | |
} | |
int pattern() { | |
return this.pattern; | |
} | |
public static PatternType patternOf(int pattern) { | |
for (PatternType r : PatternType.values()) { | |
if (r.pattern == pattern) { | |
return r; | |
} | |
} | |
return null; | |
} | |
} | |
public static String formatPatternTypeSpecific(PatternType patternType, int patternTypeSpecific) { | |
StringBuilder sb = new StringBuilder(); | |
if (patternType == PatternType.Week || patternType == PatternType.MonthNth) { | |
checkWeekDayMask(patternTypeSpecific, 1, "Sunday", sb); | |
checkWeekDayMask(patternTypeSpecific, 2, "Monday", sb); | |
checkWeekDayMask(patternTypeSpecific, 3, "Tuesday", sb); | |
checkWeekDayMask(patternTypeSpecific, 4, "Wednesday", sb); | |
checkWeekDayMask(patternTypeSpecific, 5, "Thrusday", sb); | |
checkWeekDayMask(patternTypeSpecific, 6, "Friday", sb); | |
checkWeekDayMask(patternTypeSpecific, 7, "Saturday", sb); | |
} else if (patternType == PatternType.Month) { | |
sb.append(patternTypeSpecific); | |
} | |
return sb.toString(); | |
} | |
private static void checkWeekDayMask(int intRepresentation, int position, String day, StringBuilder buffer) { | |
boolean matches = ((intRepresentation) & (1 << (position - 1))) != 0; | |
if (matches) { | |
if (buffer.length() > 0) { | |
buffer.append(", "); | |
} | |
buffer.append(day); | |
} | |
} | |
public static Map<Integer, Object> extendedProperties(Set<Integer> propertTags, MAPIMessage msg) { | |
Map<Integer, Object> mappings = new HashMap<>(); | |
for (Chunk chunk : msg.getMainChunks().getChunks()) { | |
// System.out.format("Chunk - %s - %s - %s - %s\n", toHex(chunk.getChunkId()), | |
// chunk.getEntryName(), chunk.getType().getName(), chunk.getClass().getName()); | |
if (chunk instanceof PropertiesChunk) { | |
PropertiesChunk props = (PropertiesChunk) chunk; | |
for (Entry<MAPIProperty, PropertyValue> prop : props.getRawProperties().entrySet()) { | |
if (propertTags.contains(prop.getKey().id) && prop.getValue().getValue() != null) { | |
mappings.put(prop.getKey().id, prop.getValue().getValue()); | |
// System.out.format("Property %d - %s - %s - %s\n", prop.getKey().id, | |
// toHex(prop.getKey().id), prop.getValue().getActualType(), | |
// prop.getValue().getValue()); | |
} | |
} | |
} else { | |
if (propertTags.contains(chunk.getChunkId())) { | |
// System.out.format("Chunk - %s - %s - %s - %s\n", toHex(chunk.getChunkId()), | |
// chunk.getEntryName(), chunk.getType().getName(), chunk.getClass().getName()); | |
if (chunk instanceof StringChunk) { | |
StringChunk stringChunk = (StringChunk) chunk; | |
mappings.put(chunk.getChunkId(), stringChunk.getValue()); | |
} else if (chunk instanceof ByteChunk) { | |
ByteChunk byteChunk = (ByteChunk) chunk; | |
mappings.put(chunk.getChunkId(), byteChunk.getValue()); | |
} | |
} | |
} | |
} | |
return mappings; | |
} | |
public static String formatDate(GregorianCalendar gc) { | |
return DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(gc.toZonedDateTime().toOffsetDateTime()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment