-
-
Save Su-s/438be493ae692318c73e30367cbc5c2a to your computer and use it in GitHub Desktop.
Use pure C# to extract .tar and .tar.gz files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.IO.Compression; | |
using System.Text; | |
namespace TarExample | |
{ | |
public class Tar | |
{ | |
/// <summary> | |
/// Extracts a <i>.tar.gz</i> archive to the specified directory. | |
/// </summary> | |
/// <param name="filename">The <i>.tar.gz</i> to decompress and extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTarGz(string filename, string outputDir) | |
{ | |
using (var stream = File.OpenRead(filename)) | |
ExtractTarGz(stream, outputDir); | |
} | |
/// <summary> | |
/// Extracts a <i>.tar.gz</i> archive stream to the specified directory. | |
/// </summary> | |
/// <param name="stream">The <i>.tar.gz</i> to decompress and extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTarGz(Stream stream, string outputDir) | |
{ | |
using (var gzip = new GZipStream(stream, CompressionMode.Decompress)) | |
{ | |
// removed convertation to MemoryStream | |
ExtractTar(gzip, outputDir); | |
} | |
} | |
/// <summary> | |
/// Extractes a <c>tar</c> archive to the specified directory. | |
/// </summary> | |
/// <param name="filename">The <i>.tar</i> to extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTar(string filename, string outputDir) | |
{ | |
using (var stream = File.OpenRead(filename)) | |
ExtractTar(stream, outputDir); | |
} | |
/// <summary> | |
/// Extractes a <c>tar</c> archive to the specified directory. | |
/// </summary> | |
/// <param name="stream">The <i>.tar</i> to extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTar(Stream stream, string outputDir) | |
{ | |
var buffer = new byte[100]; | |
// store current position here | |
long pos = 0; | |
while (true) | |
{ | |
pos += stream.Read(buffer, 0, 100); | |
var name = Encoding.ASCII.GetString(buffer).Trim('\0'); | |
if (String.IsNullOrWhiteSpace(name)) | |
break; | |
FakeSeekForward(stream, 24); | |
pos += 24; | |
pos += stream.Read(buffer, 0, 12); | |
var size = Convert.ToInt64(Encoding.UTF8.GetString(buffer, 0, 12).Trim('\0').Trim(), 8); | |
FakeSeekForward(stream, 376); | |
pos += 376; | |
var output = Path.Combine(outputDir, name); | |
if (!Directory.Exists(Path.GetDirectoryName(output))) | |
Directory.CreateDirectory(Path.GetDirectoryName(output)); | |
if (!name.Equals("./", StringComparison.InvariantCulture)) | |
{ | |
using (var str = File.Open(output, FileMode.OpenOrCreate, FileAccess.Write)) | |
{ | |
var buf = new byte[size]; | |
pos += stream.Read(buf, 0, buf.Length); | |
str.Write(buf, 0, buf.Length); | |
} | |
} | |
var offset = (int)(512 - (pos % 512)); | |
if (offset == 512) | |
offset = 0; | |
FakeSeekForward(stream, offset); | |
pos += offset; | |
} | |
} | |
private static void FakeSeekForward(Stream stream, int offset) | |
{ | |
if (stream.CanSeek) | |
stream.Seek(offset, SeekOrigin.Current); | |
else | |
{ | |
int bytesRead = 0; | |
var buffer = new byte[offset]; | |
while (bytesRead < offset) | |
{ | |
int read = stream.Read(buffer, bytesRead, offset - bytesRead); | |
if (read == 0) | |
throw new EndOfStreamException(); | |
bytesRead += read; | |
} | |
} | |
} | |
} | |
} |
hello.. i am trying to decompress at tar.gz file. in it there there is one gz file which comtains one tar file and that tar file contains multiple tsv files. when i try to run your code.. i get error on line 58 saying The archive entry was compressed using an unsupported compression method. .. any advise?
@naveenvermaemail85 probably cannot do anything, this one supports "GZipStream",
other formats are BZip2 or LZMA...and maybe more (so try to look for c# libs to decompress those methods) or SharpCompress can report method also.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
single byte array max size is that
https://stackoverflow.com/questions/3944320/maximum-length-of-byte/53967254