Welcome to the Java Programming Forums


The professional, friendly Java community. 21,500 members and growing!


The Java Programming Forums are a community of Java programmers from all around the World. Our members have a wide range of skills and they all have one thing in common: A passion to learn and code Java. We invite beginner Java programmers right through to Java professionals to post here and share your knowledge. Become a part of the community, help others, expand your knowledge of Java and enjoy talking with like minded people. Registration is quick and best of all free. We look forward to meeting you.


>> REGISTER NOW TO START POSTING


Members have full access to the forums. Advertisements are removed for registered users.

Results 1 to 4 of 4

Thread: parseallformat

  1. #1
    Junior Member
    Join Date
    Dec 2013
    Posts
    3
    Thanks
    0
    Thanked 0 Times in 0 Posts

    Default parseallformat

    package com.directoryfiles;

    import java.sql.SQLOutput;
    import java.util.Scanner;

    import org.apache.tika.Tika;
    import org.apache.tika.metadata.Metadata;
    import org.apache.tika.parser.AutoDetectParser;
    import org.apache.tika.parser.Parser;
    import org.apache.tika.parser.ParseContext;
    import org.apache.tika.parser.image.ImageMetadataExtracto r;
    import org.apache.tika.parser.jpeg.JpegParser;
    import org.apache.tika.sax.BodyContentHandler;

    import org.xml.sax.ContentHandler;
    import org.xml.sax.helpers.DefaultHandler;
    import org.apache.commons.io.*;
    import java.io.*;
    /**
    * Created by IntelliJ IDEA.
    * User: tr01
    * Date: 12/27/13
    * Time: 10:13 AM
    * To change this template use File | Settings | File Templates.
    */
    public class ExtractText {

    ContentHandler textHandler;
    Metadata metadata;
    AutoDetectParser parser;
    File fileName;
    String resourceLocation;
    JpegParser jpegParserObj;
    ParseContext context;
    InputStream input;
    Tika tikaObj;
    ImageMetadataExtractor imgObj;

    void parseDoc(String resourceLocation) {

    this.resourceLocation=resourceLocation;
    try{
    fileName = new File(resourceLocation);
    tikaObj = new Tika();
    InputStream input = new FileInputStream(fileName);
    //input=this.getClass().getResourceAsStream(resource Location);

    if(fileName.getName().endsWith(".zip")){
    textHandler = new BodyContentHandler((int)fileName.length() * 10);
    }
    else{
    textHandler = new BodyContentHandler((int)fileName.length()+1);
    }

    metadata = new Metadata();
    // metadata.set(Metadata.CONTENT_TYPE, tikaObj.detect(input));
    if(fileName.getName().endsWith(".jpeg") || fileName.getName().endsWith(".png") || fileName.getName().endsWith(".jpg")){
    // jpegParserObj = new JpegParser();
    //jpegParserObj.parse(input,textHandler,metadata,con text);
    }else{
    parser = new AutoDetectParser();
    parser.parse(input, textHandler, metadata);
    }


    input.close();
    // displayMetaDataContent();
    createAllFiles();
    }
    catch (IOException io)
    {
    io.printStackTrace();
    }
    catch (Exception e)
    {
    e.printStackTrace();
    }
    }

    public void imageExtract(String fileName)
    {
    try{
    tikaObj = new Tika();
    input = new FileInputStream(new File(fileName));
    Metadata metadata = new Metadata();
    ContentHandler handler = new DefaultHandler();
    Parser parser = new JpegParser();
    ParseContext context = new ParseContext();

    String mimeType = tikaObj.detect(input);
    metadata.set(Metadata.CONTENT_TYPE, mimeType);
    imgObj = new ImageMetadataExtractor(metadata);
    imgObj.parseJpeg(new File(fileName));
    // displayMetaDataContent();
    }
    catch (IOException io)
    {
    io.printStackTrace();
    }
    catch (Exception e){e.printStackTrace();}
    }

    public void displayMetaDataContent()
    {
    System.out.println("Tika Parser starts……\n");
    System.out.println("file name: "+resourceLocation);
    System.out.println("Title:" + metadata.get("title"));
    System.out.println("Author: " + metadata.get("Author"));

    System.out.println("content: " + textHandler.toString());

    System.out.println("Tika Parser stops……");
    }
    public void createAllFiles()
    {
    try
    {
    //FileUtils.writeStringToFile(new File(fileName.getPath()+".txt"),textHandler.toStri ng());
    }
    catch (Exception io)
    {
    io.printStackTrace();
    }
    }


    }

  2. #2
    Junior Member
    Join Date
    Dec 2013
    Posts
    3
    Thanks
    0
    Thanked 0 Times in 0 Posts

    Default parseallformat2

    public class FileOperation {

    File directory;
    File []fileNames;
    FileWriter fw;
    BufferedWriter bw;
    StringBuffer sb = new StringBuffer();
    ExtractText extTextObj;
    public FileOperation()
    {
    extTextObj = new ExtractText();
    }

    public File getFileNames(String directoryName)
    {
    // System.out.println(directoryName);
    directory = new File(directoryName);
    return directory;
    }


    public String createFilesInfo(File directory)
    {
    for(File fileName:directory.listFiles())
    {
    if(fileName.isDirectory())
    {
    createFilesInfo(fileName);
    }
    else
    {
    sb.append("" + fileName.getName() + "\t" + fileName.getPath() + "\n");
    // extTextObj.parseDoc(fileName.getPath());
    }
    }
    return sb.toString();
    }

    public void writeTOFile(String data)
    {
    System.out.println(""+data);
    try{
    File f = new File("D:\\firstday\\project\\searchFiles\\resource s\\searchResult.txt");
    fw = new FileWriter(f,true);
    bw = new BufferedWriter(fw);
    bw.write(data);
    bw.close();

    }
    catch(Exception e)
    {
    e.printStackTrace();
    }
    }
    }

  3. #3
    Junior Member
    Join Date
    Dec 2013
    Posts
    3
    Thanks
    0
    Thanked 0 Times in 0 Posts

    Default parseallformat3

    import java.io.IOException;
    import java.util.Scanner;
    public class SearchMain {

    Scanner s;
    String directoryName;

    public SearchMain()
    {
    s=new Scanner(System.in);
    }

    public String getDirectoryName()
    {
    System.out.println("Enter Directory Name::");
    directoryName = s.next();
    return directoryName;
    }



    public static void main(String args[])
    {
    SearchMain searchObj = new SearchMain();
    FileOperation fileObj = new FileOperation();
    fileObj.writeTOFile(fileObj.createFilesInfo(fileOb j.getFileNames(searchObj.getDirectoryName())));

    // pf.parseDoc("D:\\firstday\\Documents\\solr ebook.pdf");
    // pf.parseDoc("D:\\firstday\\Documents\\xiaoxiaoHado op.pptx");
    //pf.parseDoc("D:\\firstday\\Documents\\images.png") ; //not supported using autodetectParser
    //pf.parseDoc("D:\\firstday\\Documents\\index.jpg"); //not supported using autodetectParser
    //pf.imageExtract("D:\\firstday\\Documents\\index.jp g"); //not supported using autodetectParser
    // pf.parseDoc("D:\\firstday\\Documents\\ArrayListDem o.class");
    //pf.parseDoc("D:\\firstday\\Documents\\Book12.xlsx" );
    // pf.parseDoc("D:\\firstday\\Documents\\books.csv");
    // pf.parseDoc("D:\\firstday\\Documents\\index.html") ;
    //pf.parseDoc("D:\\firstday\\Documents\\sample.rtf") ;
    //pf.parseDoc("D:\\firstday\\Documents\\Sample_proj. doc");
    //pf.parseDoc("D:\\firstday\\Documents\\Trainee[1].docx");
    //pf.parseDoc("D:\\firstday\\Documents\\xmldoc.xml") ;
    //pf.parseDoc("D:\\firstday\\Documents\\commons-io-2.4-bin.zip"); //exception text limit
    //pf.parseDoc("D:\\firstday\\Documents\\post.jar");
    // pf.parseDoc("D:\\firstday\\Documents\\test1.zip");


    }
    }

  4. #4
    Super Moderator
    Join Date
    Jun 2013
    Location
    So. Maryland, USA
    Posts
    5,751
    My Mood
    Mellow
    Thanks
    233
    Thanked 711 Times in 697 Posts

    Default Re: parseallformat

    Read this FAQ for instructions on how to post code correctly and other useful tips for newcomers.

    Is there a question? What's the purpose of this thread?