Wednesday, June 25, 2014

File Search Program in Java [Search Based on File Name (Regex) + Text Content in files][Multi-directory]

This program takes the input:

1. Directory to look in
2. Name to look for
3. Content to look for in text files

And returns the list of file names that are relevant. Helper classes

1. PPrint is for displaying the Collection (result) with proper formatting.
2. TextFile is used to convert the documents being searched into an ArrayList of String (Words)
3. static List<String> words stores the words to look for in text files. (input by user as command line arguments)
4. Be careful, don't consider File object representing a directory as a Readable file otherwise you'll get a [ FileNotFoundException : Access Denied].
5. Rest of the code should be understandable by itself.

A sample run:

Suppose we have a directory structure:

And we run:

E:\>java Directory e:\SomeFolder .*xyz.* random

We'll get this output:

dirs: [e:\SomeFolder\AnotherFolder]

files: [
  e:\SomeFolder\abc.txt
  e:\SomeFolder\AnotherFolder\xyz.txt
]

//xyz.txt as it matches the pattern
//abc.txt as it has the string "random"

Code:

import java.util.regex.*;
import java.io.*;
import java.util.*;

class PPrint {
 public static String pformat(Collection<?> c) {
  if(c.size() == 0) return "[]";
  StringBuilder result = new StringBuilder("[");
  for(Object elem : c) {
   if(c.size() != 1)
    result.append("\n  ");
   result.append(elem);
  }
  if(c.size() != 1)
   result.append("\n");
  result.append("]");
  return result.toString();
 }
 public static void pprint(Collection<?> c) {
  System.out.println(pformat(c));
 }
 public static void pprint(Object[] c) {
  System.out.println(pformat(Arrays.asList(c)));
 }
} 

class TextFile extends ArrayList<String> {
 // Read a file as a single string:
 public static String read(String fileName) {
  StringBuilder sb = new StringBuilder();
  try {
   BufferedReader in= new BufferedReader(new FileReader(
     new File(fileName).getAbsoluteFile()));
   try {
    String s;
    while((s = in.readLine()) != null) {
     sb.append(s);
     sb.append("\n");
    }
   } finally {
    in.close();
   }
  } catch(IOException e) {
   throw new RuntimeException(e);
  }
  return sb.toString();
 }
 
 // Read a file, split by any regular expression:
 public TextFile(String fileName, String splitter) {
  super(Arrays.asList(read(fileName).split(splitter)));
  // Regular expression split() often leaves an empty
  // String at the first position:
  if(get(0).equals("")) remove(0);
 }
 // Normally read by lines:
 public TextFile(String dir) {
  this(dir, "\n");
 }
 
}
public final class Directory{ 
 public static class TreeInfo implements Iterable<File>{
  public List<File> files = new ArrayList<File>();
  public List<File> dirs = new ArrayList<File>();

  public Iterator<File> iterator(){
   return files.iterator();
  }

  void addAll(TreeInfo other){
   files.addAll(other.files);
   dirs.addAll(other.dirs);
  }

  public String toString(){
   return "dirs: "+PPrint.pformat(dirs) + 
     "\n\nfiles: "+PPrint.pformat(files);
  }
 }

 public static TreeInfo walk(String start, String regex){
  return recurseDirs(new File(start), regex);
 }

 public static TreeInfo walk(File start, String regex){
  return recurseDirs(start, regex);
 }

 public static TreeInfo walk(File start){
  return recurseDirs(start, ".*");
 }

 public static TreeInfo walk(String start){
  return recurseDirs(new File(start), ".*");
 }

 static List<String> words = new ArrayList<String>();
 static TreeInfo recurseDirs(File startDir, String regex){
  TreeInfo result = new TreeInfo();
  for(File item:startDir.listFiles()){
   if(item.isDirectory()){
    result.dirs.add(item);
    result.addAll(recurseDirs(item, regex));
   }else if(item.getName().matches(regex)){
     result.files.add(item);
    }
   
   if(item.isFile()){

    TextFile tf = new TextFile(item.getAbsolutePath(), "\\W+");
    if(!Collections.disjoint(tf, words)){
     if(!result.files.contains(item))
      result.files.add(item);
    }
   }
  }
  return result;
 }

 public static void main(String[] args){
    if(args.length<2){
     System.out.println("Usage: java Directory <dir to search in> <regex pattern to match in file names> <word to look for in files> <more words> <more...>");
    }else{
     for(int i=2;i<args.length;++i){
      words.add(args[i]);
     }
     System.out.println(walk(args[0], args[1]));
    } 
 }
}


No comments:

Post a Comment