c#

string

.net-2.0

compression

I am newbie in .net. I am doing compression and decompression string in C#. There is a XML and I am converting in string and after that I am doing compression and decompression.There is no compilation error in my code except when I decompression my code and return my string, its returning only half of the XML.

Below is my code, please correct me where I am wrong.

Code:

class Program
{
    public static string Zip(string value)
    {
        //Transform string into byte[]  
        byte[] byteArray = new byte[value.Length];
        int indexBA = 0;
        foreach (char item in value.ToCharArray())
        {
            byteArray[indexBA++] = (byte)item;
        }

        //Prepare for compress
        System.IO.MemoryStream ms = new System.IO.MemoryStream();
        System.IO.Compression.GZipStream sw = new System.IO.Compression.GZipStream(ms, System.IO.Compression.CompressionMode.Compress);

        //Compress
        sw.Write(byteArray, 0, byteArray.Length);
        //Close, DO NOT FLUSH cause bytes will go missing...
        sw.Close();

        //Transform byte[] zip data to string
        byteArray = ms.ToArray();
        System.Text.StringBuilder sB = new System.Text.StringBuilder(byteArray.Length);
        foreach (byte item in byteArray)
        {
            sB.Append((char)item);
        }
        ms.Close();
        sw.Dispose();
        ms.Dispose();
        return sB.ToString();
    }

    public static string UnZip(string value)
    {
        //Transform string into byte[]
        byte[] byteArray = new byte[value.Length];
        int indexBA = 0;
        foreach (char item in value.ToCharArray())
        {
            byteArray[indexBA++] = (byte)item;
        }

        //Prepare for decompress
        System.IO.MemoryStream ms = new System.IO.MemoryStream(byteArray);
        System.IO.Compression.GZipStream sr = new System.IO.Compression.GZipStream(ms,
            System.IO.Compression.CompressionMode.Decompress);

        //Reset variable to collect uncompressed result
        byteArray = new byte[byteArray.Length];

        //Decompress
        int rByte = sr.Read(byteArray, 0, byteArray.Length);

        //Transform byte[] unzip data to string
        System.Text.StringBuilder sB = new System.Text.StringBuilder(rByte);
        //Read the number of bytes GZipStream red and do not a for each bytes in
        //resultByteArray;
        for (int i = 0; i < rByte; i++)
        {
            sB.Append((char)byteArray[i]);
        }
        sr.Close();
        ms.Close();
        sr.Dispose();
        ms.Dispose();
        return sB.ToString();
    }

    static void Main(string[] args)
    {
        XDocument doc = XDocument.Load(@"D:\RSP.xml");
        string val = doc.ToString(SaveOptions.DisableFormatting);
        val = Zip(val);
        val = UnZip(val);
    }
} 

My XML size is 63KB.

Solution 1

The code to compress/decompress a string

public static void CopyTo(Stream src, Stream dest) {
    byte[] bytes = new byte[4096];

    int cnt;

    while ((cnt = src.Read(bytes, 0, bytes.Length)) != 0) {
        dest.Write(bytes, 0, cnt);
    }
}

public static byte[] Zip(string str) {
    var bytes = Encoding.UTF8.GetBytes(str);

    using (var msi = new MemoryStream(bytes))
    using (var mso = new MemoryStream()) {
        using (var gs = new GZipStream(mso, CompressionMode.Compress)) {
            //msi.CopyTo(gs);
            CopyTo(msi, gs);
        }

        return mso.ToArray();
    }
}

public static string Unzip(byte[] bytes) {
    using (var msi = new MemoryStream(bytes))
    using (var mso = new MemoryStream()) {
        using (var gs = new GZipStream(msi, CompressionMode.Decompress)) {
            //gs.CopyTo(mso);
            CopyTo(gs, mso);
        }

        return Encoding.UTF8.GetString(mso.ToArray());
    }
}

static void Main(string[] args) {
    byte[] r1 = Zip("StringStringStringStringStringStringStringStringStringStringStringStringStringString");
    string r2 = Unzip(r1);
}

Remember that Zip returns a byte[], while Unzip returns a string. If you want a string from Zip you can Base64 encode it (for example by using Convert.ToBase64String(r1)) (the result of Zip is VERY binary! It isn't something you can print to the screen or write directly in an XML)

The version suggested is for .NET 2.0, for .NET 4.0 use the MemoryStream.CopyTo.

IMPORTANT: The compressed contents cannot be written to the output stream until the GZipStream knows that it has all of the input (i.e., to effectively compress it needs all of the data). You need to make sure that you Dispose() of the GZipStream before inspecting the output stream (e.g., mso.ToArray()). This is done with the using() { } block above. Note that the GZipStream is the innermost block and the contents are accessed outside of it. The same goes for decompressing: Dispose() of the GZipStream before attempting to access the data.

Solution 2

according to this snippet i use this code and it's working fine:

using System;
using System.IO;
using System.IO.Compression;
using System.Text;

namespace CompressString
{
    internal static class StringCompressor
    {
        /// <summary>
        /// Compresses the string.
        /// </summary>
        /// <param name="text">The text.</param>
        /// <returns></returns>
        public static string CompressString(string text)
        {
            byte[] buffer = Encoding.UTF8.GetBytes(text);
            var memoryStream = new MemoryStream();
            using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
            {
                gZipStream.Write(buffer, 0, buffer.Length);
            }

            memoryStream.Position = 0;

            var compressedData = new byte[memoryStream.Length];
            memoryStream.Read(compressedData, 0, compressedData.Length);

            var gZipBuffer = new byte[compressedData.Length + 4];
            Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
            Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
            return Convert.ToBase64String(gZipBuffer);
        }

        /// <summary>
        /// Decompresses the string.
        /// </summary>
        /// <param name="compressedText">The compressed text.</param>
        /// <returns></returns>
        public static string DecompressString(string compressedText)
        {
            byte[] gZipBuffer = Convert.FromBase64String(compressedText);
            using (var memoryStream = new MemoryStream())
            {
                int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
                memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);

                var buffer = new byte[dataLength];

                memoryStream.Position = 0;
                using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
                {
                    gZipStream.Read(buffer, 0, buffer.Length);
                }

                return Encoding.UTF8.GetString(buffer);
            }
        }
    }
}

Solution 3

With the advent of .NET 4.0 (and higher) with the Stream.CopyTo() methods, I thought I would post an updated approach.

I also think the below version is useful as a clear example of a self-contained class for compressing regular strings to Base64 encoded strings, and vice versa:

public static class StringCompression
{
    /// <summary>
    /// Compresses a string and returns a deflate compressed, Base64 encoded string.
    /// </summary>
    /// <param name="uncompressedString">String to compress</param>
    public static string Compress(string uncompressedString)
    {
        byte[] compressedBytes;

        using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
        {
            using (var compressedStream = new MemoryStream())
            { 
                // setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
                // this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
                // although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
                using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
                {
                    uncompressedStream.CopyTo(compressorStream);
                }

                // call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
                compressedBytes = compressedStream.ToArray();
            }
        }

        return Convert.ToBase64String(compressedBytes);
    }

    /// <summary>
    /// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
    /// </summary>
    /// <param name="compressedString">String to decompress.</param>
    public static string Decompress(string compressedString)
    {
        byte[] decompressedBytes;

        var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));

        using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
        {
            using (var decompressedStream = new MemoryStream())
            {
                decompressorStream.CopyTo(decompressedStream);

                decompressedBytes = decompressedStream.ToArray();
            }
        }

        return Encoding.UTF8.GetString(decompressedBytes);
    }

Heres another approach using the extension methods technique to extend the String class to add string compression and decompression. You can drop the class below into an existing project and then use thusly:

var uncompressedString = "Hello World!";
var compressedString = uncompressedString.Compress();

and

var decompressedString = compressedString.Decompress();

To wit:

public static class Extensions
{
    /// <summary>
    /// Compresses a string and returns a deflate compressed, Base64 encoded string.
    /// </summary>
    /// <param name="uncompressedString">String to compress</param>
    public static string Compress(this string uncompressedString)
    {
        byte[] compressedBytes;

        using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
        {
            using (var compressedStream = new MemoryStream())
            { 
                // setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
                // this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
                // although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
                using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
                {
                    uncompressedStream.CopyTo(compressorStream);
                }

                // call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
                compressedBytes = compressedStream.ToArray();
            }
        }

        return Convert.ToBase64String(compressedBytes);
    }

    /// <summary>
    /// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
    /// </summary>
    /// <param name="compressedString">String to decompress.</param>
    public static string Decompress(this string compressedString)
    {
        byte[] decompressedBytes;

        var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));

        using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
        {
            using (var decompressedStream = new MemoryStream())
            {
                decompressorStream.CopyTo(decompressedStream);

                decompressedBytes = decompressedStream.ToArray();
            }
        }

        return Encoding.UTF8.GetString(decompressedBytes);
    }

Solution 4

I like @fubo's answer the best but I think this is much more elegant.

This method is more compatible because it doesn't manually store the length up front.

Also I've exposed extensions to support compression for string to string, byte[] to byte[], and Stream to Stream.

public static class ZipExtensions
{
    public static string CompressToBase64(this string data)
    {
        return Convert.ToBase64String(Encoding.UTF8.GetBytes(data).Compress());
    }

    public static string DecompressFromBase64(this string data)
    {
        return Encoding.UTF8.GetString(Convert.FromBase64String(data).Decompress());
    }
    
    public static byte[] Compress(this byte[] data)
    {
        using (var sourceStream = new MemoryStream(data))
        using (var destinationStream = new MemoryStream())
        {
            sourceStream.CompressTo(destinationStream);
            return destinationStream.ToArray();
        }
    }

    public static byte[] Decompress(this byte[] data)
    {
        using (var sourceStream = new MemoryStream(data))
        using (var destinationStream = new MemoryStream())
        {
            sourceStream.DecompressTo(destinationStream);
            return destinationStream.ToArray();
        }
    }
    
    public static void CompressTo(this Stream stream, Stream outputStream)
    {
        using (var gZipStream = new GZipStream(outputStream, CompressionMode.Compress))
        {
            stream.CopyTo(gZipStream);
            gZipStream.Flush();
        }
    }

    public static void DecompressTo(this Stream stream, Stream outputStream)
    {
        using (var gZipStream = new GZipStream(stream, CompressionMode.Decompress))
        {
            gZipStream.CopyTo(outputStream);
        }
    }
}

Solution 5

This is an updated version for .NET 4.5 and newer using async/await and IEnumerables:

public static class CompressionExtensions
{
    public static async Task<IEnumerable<byte>> Zip(this object obj)
    {
        byte[] bytes = obj.Serialize();

        using (MemoryStream msi = new MemoryStream(bytes))
        using (MemoryStream mso = new MemoryStream())
        {
            using (var gs = new GZipStream(mso, CompressionMode.Compress))
                await msi.CopyToAsync(gs);

            return mso.ToArray().AsEnumerable();
        }
    }

    public static async Task<object> Unzip(this byte[] bytes)
    {
        using (MemoryStream msi = new MemoryStream(bytes))
        using (MemoryStream mso = new MemoryStream())
        {
            using (var gs = new GZipStream(msi, CompressionMode.Decompress))
            {
                // Sync example:
                //gs.CopyTo(mso);

                // Async way (take care of using async keyword on the method definition)
                await gs.CopyToAsync(mso);
            }

            return mso.ToArray().Deserialize();
        }
    }
}

public static class SerializerExtensions
{
    public static byte[] Serialize<T>(this T objectToWrite)
    {
        using (MemoryStream stream = new MemoryStream())
        {
            BinaryFormatter binaryFormatter = new BinaryFormatter();
            binaryFormatter.Serialize(stream, objectToWrite);

            return stream.GetBuffer();
        }
    }

    public static async Task<T> _Deserialize<T>(this byte[] arr)
    {
        using (MemoryStream stream = new MemoryStream())
        {
            BinaryFormatter binaryFormatter = new BinaryFormatter();
            await stream.WriteAsync(arr, 0, arr.Length);
            stream.Position = 0;

            return (T)binaryFormatter.Deserialize(stream);
        }
    }

    public static async Task<object> Deserialize(this byte[] arr)
    {
        object obj = await arr._Deserialize<object>();
        return obj;
    }
}

With this you can serialize everything BinaryFormatter supports, instead only of strings.

Edit:

In case, you need take care of Encoding, you could just use Convert.ToBase64String(byte[])...

Take a look at this answer if you need an example!

Solution 6

For those who still getting The magic number in GZip header is not correct. Make sure you are passing in a GZip stream. ERROR and if your string was zipped using php you'll need to do something like:

       public static string decodeDecompress(string originalReceivedSrc) {
        byte[] bytes = Convert.FromBase64String(originalReceivedSrc);

        using (var mem = new MemoryStream()) {
            //the trick is here
            mem.Write(new byte[] { 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 }, 0, 8);
            mem.Write(bytes, 0, bytes.Length);

            mem.Position = 0;

            using (var gzip = new GZipStream(mem, CompressionMode.Decompress))
            using (var reader = new StreamReader(gzip)) {
                return reader.ReadToEnd();
                }
            }
        }

Solution 7

We can reduce code complexity by using StreamReader and StreamWriter rather than manually converting strings to byte arrays. Three streams is all you need:

    public static byte[] Zip(string uncompressed)
    {
        byte[] ret;
        using (var outputMemory = new MemoryStream())
        {
            using (var gz = new GZipStream(outputMemory, CompressionLevel.Optimal))
            {
                using (var sw = new StreamWriter(gz, Encoding.UTF8))
                {
                    sw.Write(uncompressed);
                }
            }
            ret = outputMemory.ToArray();
        }
        return ret;
    }

    public static string Unzip(byte[] compressed)
    {
        string ret = null;
        using (var inputMemory = new MemoryStream(compressed))
        {
            using (var gz = new GZipStream(inputMemory, CompressionMode.Decompress))
            {
                using (var sr = new StreamReader(gz, Encoding.UTF8))
                {
                    ret = sr.ReadToEnd();
                }
            }
        }
        return ret;
    }