Monday, December 14, 2015

Run Length Coding - Compression Algorithm - Java

Run length coding is used to compresses a stream of bits. It counts the number of bits consecutively and replaces them with their count.

Source:

public class RunLength { 
    private final static int R = 255;  
    
    public static String compress(String s){
        StringBuilder com = new StringBuilder();
        if(s.charAt(0)=='1')writeInt(com, 0);
        int i=0;
        
        boolean flag = false;
        while(i<s.length()){
            int j=i+1;int run=1;
            while(j<s.length() && run<=R){
                if(s.charAt(j-1)!=s.charAt(j))break;
                else {j++;run++;}
            } 
            if(run==R+1){flag = true;run=R;} 
            writeInt(com, run);
            if(flag==true){writeInt(com, 0);i=j-1;flag=false;}
            else i=j; 
        }
        return com.toString();
    }
    
    public static String expand(String s){
        int bit = 0;
        StringBuilder sb = new StringBuilder();
        int i=0;
        while(i<s.length()){
            int run = readInt(s, i);
            for(int ii=0;ii<run;++ii)sb.append(bit);
            bit=(bit==0)?1:0;
            i+=8; 
        }
        return sb.toString();
    }
    
    private static int readInt(String s, int i){
        int j = i+8;
        if(j>s.length())j = s.length()-1;
        String sub = s.substring(i, j);
        return Integer.parseInt(sub, 2);
    }
    
    private static void writeInt(StringBuilder compressed, int run){
        StringBuilder bin = new StringBuilder();
        String toBin = Integer.toBinaryString(run); 
        int fill = 8-toBin.length(); 
        while(fill-->0)bin.append(0);
        bin.append(toBin); 
        compressed.append(bin);
    }
    
    public static void main(String[] args){
        
        StringBuilder s = new StringBuilder();
        //Test Case #1
        for(int i=0;i<100;++i)s.append(0); 
        for(int i=0;i<100;++i)s.append(1); 
        
        //Representation as Run Length Coding
        String compressed = compress(s.toString());
        System.out.println("Uncompressed data requires "+s.length()+" bytes.\n"+expand(compressed));
        System.out.println("Compressed data representable by "+compressed.length()/8+" bytes\n"+compressed);
        
        //Test Case #2
        s = new StringBuilder();
        for(int i=0;i<300;++i)s.append(1);
        for(int i=0;i<300;++i)s.append(0);
        compressed = compress(s.toString());
        System.out.println("\n\nUncompressed data requires "+s.length()/8+" bytes.\n"+expand(compressed));
        System.out.println("Compressed data representable by "+compressed.length()/8+" bytes (Padding with 1 byte 0s after run length>255 and at the beginning as the string starts with 1 and not 0)\n"+compressed);

        //Test Case #3
        s = new StringBuilder();
        for(int i=0;i<300;++i)s.append(1);
        for(int i=0;i<300;++i)s.append(0);
        for(int i=0;i<10;++i)s.append(10);
        compressed = compress(s.toString());
        System.out.println("\n\nUncompressed data requires "+s.length()+" bytes.\n"+expand(compressed));
        System.out.println("Compressed data representable by "+compressed.length()/8+" bytes\n"+compressed);

    }
}

Output:

Uncompressed data requires 200 bytes.
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
Compressed data representable by 2 bytes
0110010001100100


Uncompressed data requires 75 bytes.
111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
Compressed data representable by 7 bytes (Padding with 1 byte 0s after run length>255 and at the beginning as the string starts with 1 and not 0)
00000000111111110000000000101101111111110000000000101101


Uncompressed data requires 620 bytes.
11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010101010101010101010
Compressed data representable by 27 bytes
000000001111111100000000001011011111111100000000001011010000000100000001000000010000000100000001000000010000000100000001000000010000000100000001000000010000000100000001000000010000000100000001000000010000000100000001 

No comments:

Post a Comment