Created
July 27, 2022 16:13
-
-
Save dkrusky/f0d652410412670bd35a5f3a4126a76a to your computer and use it in GitHub Desktop.
Pure c# class to extract files from .tar.gz files in memory. Expects a byte array of the file to extract.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.IO.Compression; | |
using System.Text; | |
using System.Threading.Tasks; | |
public class TarGzExtractor | |
{ | |
// path to save to | |
public string SaveDirectory { get; set; } | |
// extract full path, or files only | |
public bool ExtractFullPath { get; set; } | |
// extract only files that contain this string case insensitive | |
public string Filter { get; set; } | |
/// <summary> | |
/// Extract a <c>tar.gz</c> archive to the specified directory. | |
/// </summary> | |
/// <param name="gzip">The byte array of the <i>.tar.gz</i> to extract.</param> | |
public async Task<List<string>> DecompressAsync(byte[] gzip) | |
{ | |
List<string> lstFiles = new List<string>(); | |
// decompress gzip into memory | |
using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress)) | |
{ | |
const int size = 4096; | |
Int64 size64 = 0; | |
byte[] buffer = new byte[size]; | |
using (MemoryStream memory = new MemoryStream()) | |
{ | |
// handle gzip format | |
int count = 0; | |
do | |
{ | |
count = await stream.ReadAsync(buffer, 0, size); | |
if (count > 0) | |
{ | |
await memory.WriteAsync(buffer, 0, count); | |
} | |
} | |
while (count > 0); | |
memory.Seek(0, SeekOrigin.Begin); | |
// handle tar format | |
string name = "x"; | |
string currentPath = ""; | |
string bufferedString = ""; | |
long position = 0; | |
long offset = 0; | |
buffer = new byte[100]; | |
try | |
{ | |
while (true) | |
{ | |
buffer = new byte[100]; | |
await memory.ReadAsync(buffer, 0, 100); | |
name = Encoding.ASCII.GetString(buffer).Trim('\0').Replace('/', '\\').Trim('/', '\\'); | |
memory.Seek(24, SeekOrigin.Current); | |
if (String.IsNullOrWhiteSpace(name)) { break; } | |
await memory.ReadAsync(buffer, 0, 12); | |
bufferedString = Encoding.UTF8.GetString(buffer, 0, 12).Trim('\0').Trim(); | |
size64 = Convert.ToInt64(bufferedString, 8); | |
memory.Seek(376L, SeekOrigin.Current); | |
currentPath = Path.GetDirectoryName(Path.Combine(SaveDirectory, name)); | |
if (ExtractFullPath) | |
{ | |
if (size64 > 0) | |
{ | |
if (!Directory.Exists(currentPath)) | |
{ | |
Directory.CreateDirectory(currentPath); | |
} | |
if ((Filter.Length > 0 && name.ToLower().Contains(Filter.ToLower())) || Filter.Length == 0) | |
{ | |
// only save if filename contains something in Filter or Filter is empty | |
using (FileStream fs = new FileStream(Path.Combine(SaveDirectory, name), FileMode.OpenOrCreate, FileAccess.Write)) | |
{ | |
buffer = new byte[size64]; | |
await memory.ReadAsync(buffer, 0, buffer.Length); | |
await fs.WriteAsync(buffer, 0, buffer.Length); | |
lstFiles.Add(Path.Combine(SaveDirectory, name)); | |
} | |
} | |
else | |
{ | |
buffer = new byte[size64]; | |
await memory.ReadAsync(buffer, 0, buffer.Length); | |
} | |
} | |
else | |
{ | |
if (!File.GetAttributes(currentPath).HasFlag(FileAttributes.Directory)) | |
{ | |
Directory.CreateDirectory(currentPath); | |
} | |
} | |
} | |
else | |
{ | |
if (size64 > 0) | |
{ | |
name = Path.GetFileName(Path.Combine(SaveDirectory, name)); | |
if ((Filter.Length > 0 && name.ToLower().Contains(Filter.ToLower())) || Filter.Length == 0) | |
{ | |
using (FileStream fs = new FileStream(Path.Combine(SaveDirectory, name), FileMode.OpenOrCreate, FileAccess.Write)) | |
{ | |
buffer = new byte[size64]; | |
await memory.ReadAsync(buffer, 0, buffer.Length); | |
await fs.WriteAsync(buffer, 0, buffer.Length); | |
lstFiles.Add(Path.Combine(SaveDirectory, name)); | |
} | |
} | |
else | |
{ | |
buffer = new byte[size64]; | |
await memory.ReadAsync(buffer, 0, buffer.Length); | |
} | |
} | |
} | |
position = memory.Position; | |
offset = 512 - (position % 512); | |
if (offset == 512) { offset = 0; } | |
memory.Seek(offset, SeekOrigin.Current); | |
}; | |
} | |
catch (Exception ex) | |
{ | |
// Get stack trace for the exception with source file information | |
var st = new StackTrace(ex, true); | |
// Get the top stack frame | |
var frame = st.GetFrame(st.FrameCount - 1); | |
// Get the line number from the stack frame | |
var line = frame.GetFileLineNumber(); | |
} | |
} | |
} | |
return lstFiles; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment