Logo Search packages:      
Sourcecode: cdk version File versions

FileConvertor.java

/* $RCSfile$
 * $Author: egonw $
 * $Date: 2008-02-17 10:06:51 +0100 (Sun, 17 Feb 2008) $
 * $Revision: 10151 $
 *
 * Copyright (C) 2001-2007  The Chemistry Development Kit (CDK) project
 *
 * Contact: cdk-devel@lists.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package org.openscience.cdk.applications;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.util.List;
import java.util.Properties;
import java.util.Vector;

import javax.vecmath.Vector2d;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.config.AtomTypeFactory;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.rebond.RebondTool;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemModel;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IChemSequence;
import org.openscience.cdk.interfaces.ICrystal;
import org.openscience.cdk.interfaces.IMolecule;
import org.openscience.cdk.interfaces.IMoleculeSet;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.io.CDKSourceCodeWriter;
import org.openscience.cdk.io.HINWriter;
import org.openscience.cdk.io.IChemObjectIO;
import org.openscience.cdk.io.IChemObjectReader;
import org.openscience.cdk.io.IChemObjectWriter;
import org.openscience.cdk.io.MDLWriter;
import org.openscience.cdk.io.PDBWriter;
import org.openscience.cdk.io.ReaderFactory;
import org.openscience.cdk.io.SMILESWriter;
import org.openscience.cdk.io.SVGWriter;
import org.openscience.cdk.io.ShelXWriter;
import org.openscience.cdk.io.XYZWriter;
import org.openscience.cdk.io.listener.PropertiesListener;
import org.openscience.cdk.io.listener.TextGUIListener;
import org.openscience.cdk.io.program.GaussianInputWriter;
import org.openscience.cdk.io.setting.IOSetting;
import org.openscience.cdk.layout.StructureDiagramGenerator;
import org.openscience.cdk.tools.HydrogenAdder;
import org.openscience.cdk.tools.LoggingTool;
import org.openscience.cdk.tools.SaturationChecker;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;

/**
 * Program that converts a file from one format to a file with another format.
 * Supported formats are:
 *   input: CML, MDL MOL/SDF file, PDB, PMP, ShelX, SMILES, XYZ
 *  output: CML, MDL MOL/SDF file, PDB, ShelX, SMILES, SVG, XYZ, 
 *          Gaussian Input, CDK source code
 *
 * @cdk.module applications
 *
 * @author Egon Willighagen <egonw@sci.kun.nl>
 *
 * @cdk.require      java1.4+
 * @cdk.require      batik
 * @cdk.keyword      command line util
 * @cdk.keyword      file format
 * @cdk.builddepends commons-cli-1.0.jar
 */
00100 public class FileConvertor {

/*
 *  This is a command line application           
 *  Do not convert System.out/err.println() 
 *  to logger statements
 */   

    private LoggingTool logger;

    private IChemObjectBuilder builder;
    private IChemObjectReader cor;
    private String oformat;
    private IChemObjectWriter cow;

    private TextGUIListener settingListener;
    private PropertiesListener propsListener;
    private int level;
    private Vector chemObjectNames = new Vector();

      /* The below three processings are applied on the molecular level
       * only, and the implementation can be found in write(Molecule);
       */
    private boolean applyHAdding = false;
    private boolean applyHRemoval = false;
    private boolean apply2DCleanup = false;
    private boolean apply3DRebonding = false;

    public FileConvertor() {
        logger = new LoggingTool();
        LoggingTool.configureLog4j();
        logger.dumpSystemProperties();
        
        builder = DefaultChemObjectBuilder.getInstance();

        settingListener = new TextGUIListener(level);
        propsListener = null;

        this.level = 0;
        this.oformat = "cml";

        chemObjectNames.add("org.openscience.cdk.Molecule");
        chemObjectNames.add("org.openscience.cdk.MoleculeSet");
        chemObjectNames.add("org.openscience.cdk.Crystal");
        chemObjectNames.add("org.openscience.cdk.ChemModel");
        chemObjectNames.add("org.openscience.cdk.ChemSequence");
        chemObjectNames.add("org.openscience.cdk.ChemFile");
    }

    /**
     * Convert the file <code>ifilename</code>.
     *
     * @param ifilename name of input file
     */
00154     public boolean convert(String ifilename) {
        if (this.level > 0) System.out.println();
        boolean success = false;
        try {
            File file = new File(ifilename);
            if (file.isFile()) {
                cor = getChemObjectReader(file);
                if (cor == null) {
                    logger.warn("The format of the input file is not recognized or not supported.");
                    System.err.println("The format of the input file is not recognized or not supported.");
                    return false;
                }

                IChemFile content = (IChemFile)cor.read(builder.newChemFile());
                if (content == null) {
                    return false;
                }

                // apply modifications
                List containersList = ChemFileManipulator.getAllAtomContainers(content);
                AtomTypeFactory factory = AtomTypeFactory.getInstance(
                    "org/openscience/cdk/config/data/jmol_atomtypes.txt",
                    content.getBuilder()
                );
                for (int i=0; i<containersList.size(); i++) {
                  IAtomContainer container = (IAtomContainer)containersList.get(i);
                  java.util.Iterator atoms = container.atoms();
                    if (applyHAdding || applyHRemoval || apply2DCleanup || apply3DRebonding) {
                        while (atoms.hasNext()) {
                              IAtom atom = (IAtom)atoms.next();
                            if (!(atom instanceof IPseudoAtom)) {
                                try {
                                    factory.configure(atom);
                                } catch (CDKException exception) {
                                    logger.warn("Could not configure atom: ", exception.getMessage());
                                    logger.debug(exception);
                                }
                            }
                        }
                    }
                    if (applyHAdding) {
                        logger.info("Adding Hydrogens...");
                        HydrogenAdder adder = new HydrogenAdder("org.openscience.cdk.tools.ValencyChecker");
                        adder.addExplicitHydrogensToSatisfyValency(
                              builder.newMolecule(container)
                        );
                    } else if (applyHRemoval) {
                        for (int atomi=0; atomi<container.getAtomCount(); atomi++) {
                            if (container.getAtom(atomi).getSymbol().equals("H")) {
                                container.removeAtomAndConnectedElectronContainers(container.getAtom(atomi));
                            }
                        }
                    }
                    if (apply3DRebonding) {
                        logger.info("Creating bonds from 3D coordinates");
                        RebondTool rebonder = new RebondTool(2.0, 0.5, 0.5);
                        rebonder.rebond(container);
                        SaturationChecker satChecker = new SaturationChecker();
                        satChecker.saturate(container);
                    }
                    if (apply2DCleanup) {
                        logger.info("Creating 2D coordinates");
                        StructureDiagramGenerator sdg = new StructureDiagramGenerator();
                        try {
                            sdg.setMolecule(builder.newMolecule(container), false); // false -> don't make clone!
                            sdg.generateCoordinates(new Vector2d(0, 1));
                            container = sdg.getMolecule();
                            logger.debug("###########", container);
                        } catch (Exception exc) {
                            System.out.println("Could not generate coordinates for this molecule.");
                            logger.info("Could not generate coordinates for this molecule: ", exc.getMessage());
                            logger.debug(exc);
                            System.exit(1);
                        }
                    }
                }
                
                // create output file
                String ofilename = getOutputFileName(ifilename, this.oformat);
                FileWriter fileWriter = new FileWriter(new File(ofilename));
                cow = getChemObjectWriter(this.oformat, fileWriter);
                if (cow == null) {
                    logger.warn("Format ", oformat, " is an unsupported output format.");
                    System.err.println("Unsupported output format!");
                    return false;
                }
                write(content, ofilename);
                cow.close();

                success = true;
            } else {
                System.out.println("Skipping non-file.");
            }
        } catch (FileNotFoundException exception) {
            System.out.println("File " + ifilename + " does not exist!");
        } catch (Exception exception) {
            logger.debug(exception);
        }
        return success;
    }

    /**
     * actual program
     */
00258     public static void main(String[] args) {
        FileConvertor convertor = new FileConvertor();

        // process options
        String[] filesToConvert = convertor.parseCommandLineOptions(args);

        // do conversion(s)
        for (int i=0; i < filesToConvert.length; i++) {
            String inputFilename = filesToConvert[i];
            System.out.print("Converting " + inputFilename + " ... ");
            boolean success = convertor.convert(inputFilename);
            if (success) {
                System.out.println("succeeded!");
            } else {
                System.out.println("failed!");
            }
        }
    }

    // PRIVATE INTERNAL STUFF

    private IChemObjectReader getChemObjectReader(File file) throws IOException {
        InputStream fileReader = new FileInputStream(file);
        IChemObjectReader reader = new ReaderFactory().createReader(fileReader);
        if (reader != null) {
            if (settingListener != null) {
                reader.addChemObjectIOListener(settingListener);
            }
            if (propsListener != null) {
                reader.addChemObjectIOListener(propsListener);
            }
        }
        return reader;
    }

    private IChemObjectWriter getChemObjectWriter(String format, Writer fileWriter) {
        IChemObjectWriter writer = null;
        try {
            if (format.equalsIgnoreCase("MOL")) {
                writer = new MDLWriter(fileWriter);
            } else if (format.equalsIgnoreCase("SMI")) {
                writer = new SMILESWriter(fileWriter);
            } else if (format.equalsIgnoreCase("SHELX")) {
                writer = new ShelXWriter(fileWriter);
            } else if (format.equalsIgnoreCase("SVG")) {
                writer = new SVGWriter(fileWriter);
            } else if (format.equalsIgnoreCase("XYZ")) {
                writer = new XYZWriter(fileWriter);
            } else if (format.equalsIgnoreCase("PDB")) {
                writer = new PDBWriter(fileWriter);
            } else if (format.equalsIgnoreCase("GIN")) {
                writer = new GaussianInputWriter(fileWriter);
            } else if (format.equalsIgnoreCase("CDK")) {
                writer = new CDKSourceCodeWriter(fileWriter);
            } else if (format.equalsIgnoreCase("HIN")) {
                writer = new HINWriter(fileWriter);
            }
            if (writer != null) {
                logger.debug(format + " -> " + writer.getClass().getName());
                if (settingListener != null) {
                    writer.addChemObjectIOListener(settingListener);
                }
                if (propsListener != null) {
                    writer.addChemObjectIOListener(propsListener);
                }
            } else if (format.equalsIgnoreCase("CML")) {
                  Class cmlWriterClass = this.getClass().getClassLoader().loadClass("org.opscience.cdk.io.CMLWriter");
                  if (cmlWriterClass != null) {
                    writer = (IChemObjectWriter)cmlWriterClass.newInstance();
                  }
                  Constructor constructor = writer.getClass().getConstructor(new Class[]{Writer.class});
                  writer = (IChemObjectWriter)constructor.newInstance(new Object[]{fileWriter});
            } else {
                logger.debug(format + " -> null");
            }
        } catch (Exception exception) {
            logger.error("Could not instantiate writer: ", exception.getMessage());
            logger.debug(exception);
        }
        return writer;
    }

    private String getOutputFileName(String inputFilename, String outputFormat) {
        String outputFilename = inputFilename.substring(0,inputFilename.lastIndexOf('.'));
        outputFilename = outputFilename.substring(outputFilename.lastIndexOf(File.separatorChar) + 1);
        outputFilename = outputFilename + ".";
        if (outputFormat.equalsIgnoreCase("CML")) {
            outputFilename = outputFilename + "cml";
        } else if (outputFormat.equalsIgnoreCase("MOL")) {
            outputFilename = outputFilename + "mol";
        } else if (outputFormat.equalsIgnoreCase("SMI")) {
            outputFilename = outputFilename + "smi";
        } else if (outputFormat.equalsIgnoreCase("SHELX")) {
            outputFilename = outputFilename + "res";
        } else if (outputFormat.equalsIgnoreCase("SVG")) {
            outputFilename = outputFilename + "svg";
        } else if (outputFormat.equalsIgnoreCase("XYZ")) {
            outputFilename = outputFilename + "xyz";
        } else if (outputFormat.equalsIgnoreCase("PDB")) {
            outputFilename = outputFilename + "pdb";
        } else if (outputFormat.equalsIgnoreCase("GIN")) {
            outputFilename = outputFilename + "in";
        } else if (outputFormat.equalsIgnoreCase("CDK")) {
            outputFilename = outputFilename + "java.fragment";
        } else if (outputFormat.equalsIgnoreCase("HIN")) {
            outputFilename = outputFilename + "hin";
        }
        return outputFilename;
    }

    private void printHelp(Options options) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("FileConvertor", options);
        
        // now report on the supported formats
        System.out.println();
        System.out.println(" OUTPUT FORMATS:");
        System.out.println("  cml    Chemical Markup Language (the default)");
        System.out.println("  gin    Gaussian Input File");
        System.out.println("  hin    Hyperchem file");
        System.out.println("  mol    MDL molfile");
        System.out.println("  pdb    PDB");
        System.out.println("  shelx  ShelX");
        System.out.println("  smi    SMILES");
        System.out.println("  svg    Scalable Vector Graphics");
        System.out.println("  xyz    XYZ");
        
        System.exit(0);
    }
    
    /**
     * Parses the options in the command line arguments and returns
     * the index of the first non-option argument.
     */
00392     private String[] parseCommandLineOptions(String[] args) {

        Options options = new Options();
        options.addOption("h", "help", false, "give this help page");
        options.addOption(
            OptionBuilder.withLongOpt("question").
                          withDescription("level of IO questions [none|fewest|some|all]").
                          withValueSeparator('=').
                          hasArg().
                          create("q")
        );
        options.addOption(
            OptionBuilder.withLongOpt("outputformat").
                          withDescription("see below for supported formats (CML2 is default)").
                          withValueSeparator('=').
                          hasArg().
                          create("o")
        );
        options.addOption(
            OptionBuilder.withLongOpt("listoptions").
                          withDescription("lists the IO questions for the given format").
                          withValueSeparator('=').
                          hasArg().
                          create("l")
        );
        options.addOption(
            OptionBuilder.withLongOpt("properties").
                          withDescription("Java properties file with the IO settings").
                          withValueSeparator('=').
                          hasArg().
                          create("p")
        );
        options.addOption(
            OptionBuilder.withLongOpt("addHydrogens").
                          withDescription("add explicit hydrogens where missing").
                          create("a")
        );
        options.addOption(
            OptionBuilder.withLongOpt("removeHydrogens").
                          withDescription("remove all explicit hydrogens").
                          create("r")
        );
        options.addOption(
            OptionBuilder.withLongOpt("create2DCoordinates").
                          withDescription("create 2D coordinates using a layout algorithm").
                          create("2")
        );
        options.addOption(
            OptionBuilder.withLongOpt("rebondFrom3DCoordinates").
                          withDescription("calculate bonds from 3D coordinates").
                          create("b")
        );
        
        CommandLine line = null;
        try {
            CommandLineParser parser = new PosixParser();
            line = parser.parse(options, args);
        } catch (ParseException exception) {
            System.err.println("Unexpected exception: " + exception.toString());
        }
    
        if (line.hasOption("o")) {
            this.oformat = line.getOptionValue("o");
        }
        if (line.hasOption("q")) {
            String level = line.getOptionValue("q");
            if ("none".equals(level)) {
                settingListener = new TextGUIListener(0);
            } else if ("fewest".equals(level)) {
                settingListener = new TextGUIListener(1);
            } else if ("some".equals(level)) {
                settingListener = new TextGUIListener(2);
            } else if ("all".equals(level)) {
                settingListener = new TextGUIListener(3);
            } else {
                System.out.println("Only supported levels: none, fewest, some, all");
                System.exit(1);
            }
        }
        if (line.hasOption("l")) {
            listOptionsForIOClass(line.getOptionValue("l"));
            System.exit(0);
        }
        if (line.hasOption("p")) {
            String filename = line.getOptionValue("p");
            try {
                File file = new File(filename);
                Properties props = new Properties();
                props.load(new FileInputStream(file));
                propsListener = new PropertiesListener(props);
                settingListener = null;
            } catch (FileNotFoundException exception) {
                System.out.println("Cannot find properties file: " + filename);
                System.exit(1);
            } catch (IOException exception) {
                System.out.println("Cannot read properties file: " + filename);
                System.exit(1);
            }
        }
        if (line.hasOption("a")) {
            this.applyHAdding = true;
        }
        if (line.hasOption("r")) {
            this.applyHRemoval = true;
        }
        if (line.hasOption("2")) {
            this.apply2DCleanup = true;
        }
        if (line.hasOption("b")) {
            this.apply3DRebonding = true;
        }

        String[] filesToConvert = line.getArgs();
        
        if (filesToConvert.length == 0 || line.hasOption("h")) {
            printHelp(options);
        }
        
        return filesToConvert;
    }

    public void listOptionsForIOClass(String ioClassName) {
        logger.debug("listing IOSetting options");
        
        String className = "org.openscience.cdk.io." + ioClassName;
        try {
            Object readerOrWriter = this.getClass().getClassLoader().
                loadClass(className).newInstance();
            IOSetting[] settings = new IOSetting[0];
            if (readerOrWriter instanceof IChemObjectIO) {
                IChemObjectIO ioClass = (IChemObjectIO)readerOrWriter;
                settings = ioClass.getIOSettings();
            } else {
                String message = "This class is not a CDK ChemObjectIO class";
                System.out.println(message);
                logger.error(message);
                return;
            }
            TextGUIListener listener = new TextGUIListener(4); // ask all questions
            listener.setInputReader(null); // but don't really ask them
            for (int i=0; i<settings.length; i++) {
                IOSetting setting = settings[i];
                if (setting != null) {
                    listener.processIOSettingQuestion(setting);
                } else {
                    String message = "This IOSetting is null";
                    System.out.println(message);
                    logger.warn(message);
                }
            }
        } catch (ClassNotFoundException exception) {
            String message = "This Reader/Writer does not exist: " + className;
            System.out.println(message);
            logger.error(message);
            logger.debug(exception);
        } catch (InstantiationException exception) {
            String message = "Could not instantiate the class: " + className;
            System.out.println(message);
            logger.error(message);
            logger.debug(exception);
        } catch (Exception exception) {
            System.out.println("An unknown exception occured: " + exception.toString());
            logger.debug(exception);
        }
    }

    /**
    * Since we do not know what kind of IChemObject the Writer supports,
    * and we want to output as much information as possible, use
    * the generalized mechanism below.
    */
00563     private void write(IChemFile chemFile, String outputFilename) throws IOException {
        if (cow.accepts(chemFile.getClass())) {
            // Can write ChemFile, do so
            try {
                cow.write(chemFile);
            } catch (CDKException e) {
                logger.error("Could not write ChemFile. FIXME: I should recurse!");
            }
        } else {
            logger.info("Cannot write ChemFile, recursing into ChemSequence's.");
            int count = chemFile.getChemSequenceCount();
            boolean needMoreFiles = (cow.accepts(IChemSequence.class)) && (count > 1);
            for (int i=0; i < count; i++) {
                if (needMoreFiles) {
                    cow.close(); // possibly closing empty file
                    String fname = outputFilename + "." + (i+1);
                    FileWriter fileWriter = new FileWriter(new File(fname));
                    cow = getChemObjectWriter(this.oformat, fileWriter);
                }
                write(chemFile.getChemSequence(i), outputFilename);
            }
        }
    }

    private void write(IChemSequence sequence, String outputFilename) throws IOException {
        try {
            cow.write(sequence);
        } catch (CDKException exception) {
            int count = sequence.getChemModelCount();
            boolean needMoreFiles = (cow.accepts(IChemModel.class)) && (count > 1);
            logger.info("Cannot write ChemSequence, recursing into ChemModel's.");
            for (int i=0; i < count; i++) {
                if (needMoreFiles) {
                    cow.close(); // possibly closing empty file
                    String fname = outputFilename + "." + (i+1);
                    FileWriter fileWriter = new FileWriter(new File(fname));
                    cow = getChemObjectWriter(this.oformat, fileWriter);
                }
                write(sequence.getChemModel(i), outputFilename);
            }
        }
    }

    private void write(IChemModel cm, String outputFilename) throws IOException {
        try {
            cow.write(cm);
        } catch (CDKException exception) {
            logger.info("Cannot write ChemModel, trying Crystal.");
            ICrystal crystal = cm.getCrystal();
            if (crystal != null) {
                write(crystal, outputFilename);
            }
            IMoleculeSet som = cm.getMoleculeSet();
            if (som != null) {
                write(som, outputFilename);
            }
        }
    }

    private void write(ICrystal c, String outputFilename) throws IOException {
        try {
            cow.write(c);
        } catch (CDKException exception) {
            logger.error("Cannot write Crystal: ", exception.getMessage());
        }
    }

    private void write(IMoleculeSet som, String outputFilename) throws IOException {
        try {
              if (apply2DCleanup) {
                        logger.info("Creating 2D coordinates");
                        java.util.Iterator mols = som.molecules();
                        StructureDiagramGenerator sdg = new StructureDiagramGenerator();
                        while (mols.hasNext()) {
                              IMolecule molecule = (IMolecule)mols.next();
                        try {
                            sdg.setMolecule(molecule, false); // false -> don't make clone!
                            sdg.generateCoordinates(new Vector2d(0, 1));
                            molecule = sdg.getMolecule();
                        } catch (Exception exception) {
                            System.out.println("Could not generate coordinates for this molecule.");
                            System.exit(1);
                        }
                        }
                  }
            cow.write(som);
        } catch (CDKException exception) {
            int count = som.getMoleculeCount();
            boolean needMoreFiles = (cow.accepts(IMoleculeSet.class)) && (count > 1);
            logger.info("Cannot write MoleculeSet, recursing into Molecules's.");
            for (int i=0; i < count; i++) {
                if (needMoreFiles) {
                    cow.close(); // possibly closing empty file
                    String fname = outputFilename + "." + (i+1);
                    FileWriter fileWriter = new FileWriter(new File(fname));
                    cow = getChemObjectWriter(this.oformat, fileWriter);
                }
                write(som.getMolecule(i), outputFilename);
            }
        }
    }

    private void write(IMolecule molecule, String outputFilename) throws IOException {
        try {
            cow.write(molecule);
        } catch (CDKException exception) {
            logger.error("Cannot write molecule: ", exception.getMessage());
            logger.debug(exception);
        }
    }
    
}




Generated by  Doxygen 1.6.0   Back to index