Logo Search packages:      
Sourcecode: cdk version File versions  Download package

IChemFile org::openscience::cdk::io::PDBReader::readChemFile ( IChemFile  oFile  )  [inline, private]

Read a ChemFile from a file in PDB format. The molecules in the file are stored as BioPolymers in the ChemFile. The residues are the monomers of the BioPolymer, and their names are the concatenation of the residue, chain id, and the sequence number. Separate chains (denoted by TER records) are stored as separate BioPolymer molecules.

Connectivity information is not currently read.

Returns:
The ChemFile that was read from the PDB file.

As HETATMs cannot be considered to either belong to a certain monomer or strand, they are dealt with seperately.

Definition at line 188 of file PDBReader.java.

References org::openscience::cdk::interfaces::IAtomContainer::addAtom(), org::openscience::cdk::protein::data::PDBPolymer::addAtom(), org::openscience::cdk::interfaces::IAtomContainerSet::addAtomContainer(), org::openscience::cdk::interfaces::IChemSequence::addChemModel(), org::openscience::cdk::interfaces::IChemFile::addChemSequence(), org::openscience::cdk::interfaces::IMoleculeSet::addMolecule(), org::openscience::cdk::protein::data::PDBPolymer::addStructure(), org::openscience::cdk::tools::LoggingTool::debug(), org::openscience::cdk::tools::LoggingTool::error(), org::openscience::cdk::interfaces::IAtomContainer::getAtomCount(), org::openscience::cdk::AtomContainer::getAtomCount(), org::openscience::cdk::interfaces::IChemObject::getBuilder(), org::openscience::cdk::protein::data::PDBAtom::getChainID(), org::openscience::cdk::protein::data::PDBAtom::getICode(), org::openscience::cdk::BioPolymer::getMonomer(), org::openscience::cdk::interfaces::IChemObject::getProperty(), org::openscience::cdk::protein::data::PDBAtom::getResName(), org::openscience::cdk::protein::data::PDBAtom::getResSeq(), org::openscience::cdk::protein::data::PDBAtom::getSerial(), org::openscience::cdk::BioPolymer::getStrand(), org::openscience::cdk::tools::LoggingTool::info(), org::openscience::cdk::io::setting::BooleanIOSetting::isSet(), org::openscience::cdk::interfaces::IChemObjectBuilder::newChemModel(), org::openscience::cdk::interfaces::IChemObjectBuilder::newChemSequence(), org::openscience::cdk::interfaces::IChemObjectBuilder::newMolecule(), org::openscience::cdk::interfaces::IChemObjectBuilder::newMoleculeSet(), readAtom(), org::openscience::cdk::AtomContainer::removeAllBonds(), org::openscience::cdk::protein::data::PDBMonomer::setChainID(), org::openscience::cdk::protein::data::PDBStructure::setEndChainID(), org::openscience::cdk::protein::data::PDBStructure::setEndInsertionCode(), org::openscience::cdk::protein::data::PDBStructure::setEndSequenceNumber(), org::openscience::cdk::protein::data::PDBAtom::setHetAtom(), org::openscience::cdk::protein::data::PDBMonomer::setICode(), org::openscience::cdk::interfaces::IChemObject::setID(), org::openscience::cdk::interfaces::IChemModel::setMoleculeSet(), org::openscience::cdk::Monomer::setMonomerName(), org::openscience::cdk::Monomer::setMonomerType(), org::openscience::cdk::interfaces::IChemObject::setProperty(), org::openscience::cdk::protein::data::PDBMonomer::setResSeq(), org::openscience::cdk::protein::data::PDBStructure::setStartChainID(), org::openscience::cdk::protein::data::PDBStructure::setStartInsertionCode(), org::openscience::cdk::protein::data::PDBStructure::setStartSequenceNumber(), org::openscience::cdk::interfaces::IStrand::setStrandName(), org::openscience::cdk::protein::data::PDBStructure::setStructureType(), and org::openscience::cdk::tools::LoggingTool::warn().

Referenced by read().

                                                            {
            // initialize all containers
            IChemSequence oSeq = oFile.getBuilder().newChemSequence();
            IChemModel oModel = oFile.getBuilder().newChemModel();
            IMoleculeSet oSet = oFile.getBuilder().newMoleculeSet();
            
            // some variables needed
            String cCol;
            PDBAtom oAtom;
            PDBPolymer oBP = new PDBPolymer();
            IMolecule molecularStructure = oFile.getBuilder().newMolecule();
            StringBuffer cResidue;
            String oObj;
            IMonomer oMonomer;
            String cRead = "";
            char chain = 'A'; // To ensure stringent name giving of monomers
            IStrand oStrand;
            int lineLength = 0;
            
            boolean isProteinStructure = false;
            
            atomNumberMap = new Hashtable();
            
            // do the reading of the Input            
            try {
                  do {
                        cRead = _oInput.readLine();
                        logger.debug("Read line: ", cRead);
                        if (cRead != null) {    
                              lineLength = cRead.length();
                              
                              if (lineLength < 80) {
                                    logger.warn("Line is not of the expected length 80!");
                              }
                              
                              // make sure the record name is 6 characters long
                              if (lineLength < 6) {
                                    cRead = cRead + "      ";
                              }
                              // check the first column to decide what to do
                              cCol = cRead.substring(0,6);
                              if ("SEQRES".equalsIgnoreCase(cCol)) {
                                    isProteinStructure = true;
                              } else if ("ATOM  ".equalsIgnoreCase(cCol)) {
                                    // read an atom record
                                    oAtom = readAtom(cRead, lineLength);
                                    
                                    if (isProteinStructure) {
                                          // construct a string describing the residue
                                          cResidue = new StringBuffer(8);
                                          oObj = oAtom.getResName();
                                          if (oObj != null) {
                                                cResidue = cResidue.append(oObj.trim());
                                          }
                                          oObj = oAtom.getChainID();
                                          if (oObj != null) {
                                                // cResidue = cResidue.append(((String)oObj).trim());
                                                cResidue = cResidue.append(String.valueOf(chain));
                                          }
                                          oObj = oAtom.getResSeq();
                                          if (oObj != null) {
                                                cResidue = cResidue.append(oObj.trim());
                                          }

                                          // search for an existing strand or create a new one.
                                          String strandName = oAtom.getChainID();
                                          if (strandName == null || strandName.length() == 0) {
                                                strandName = String.valueOf(chain);
                                          }
                                          oStrand = oBP.getStrand(strandName);
                                          if (oStrand == null) {
                                                oStrand = new PDBStrand();
                                                oStrand.setStrandName(strandName);
                                                oStrand.setID(String.valueOf(chain));
                                          }

                                          // search for an existing monomer or create a new one.
                                          oMonomer = oBP.getMonomer(cResidue.toString(), String.valueOf(chain));
                                          if (oMonomer == null) {
                                                PDBMonomer monomer = new PDBMonomer();
                                                monomer.setMonomerName(cResidue.toString());
                                                monomer.setMonomerType(oAtom.getResName());
                                                monomer.setChainID(oAtom.getChainID());
                                                monomer.setICode(oAtom.getICode());
                                                monomer.setResSeq(oAtom.getResSeq());
                                                oMonomer = monomer;
                                          }

                                          // add the atom
                                          oBP.addAtom(oAtom, oMonomer, oStrand);
                                          if (readConnect.isSet() && atomNumberMap.put(new Integer(oAtom.getSerial()), oAtom) != null) {
                                                logger.warn("Duplicate serial ID found for atom: ", oAtom);
                                          }
                                    } else {
                                          molecularStructure.addAtom(oAtom);
                                    }
                                    logger.debug("Added ATOM: ", oAtom);
                                    
                                    /** As HETATMs cannot be considered to either belong to a certain monomer or strand,
                                     * they are dealt with seperately.*/
                              } else if("HETATM".equalsIgnoreCase(cCol))      {
                                    // read an atom record
                                    oAtom = readAtom(cRead, lineLength);
                                    oAtom.setHetAtom(true);
                                    oBP.addAtom(oAtom);
                                    if (atomNumberMap.put(new Integer(oAtom.getSerial()), oAtom) != null) {
                                          logger.warn("Duplicate serial ID found for atom: ", oAtom);
                                    }
                                    logger.debug("Added HETATM: ", oAtom);
                              } else if ("TER   ".equalsIgnoreCase(cCol)) {
                                    // start new strand                                   
                                    chain++;
                                    oStrand = new PDBStrand();
                                    oStrand.setStrandName(String.valueOf(chain));
                                    logger.debug("Added new STRAND");
                              } else if ("END   ".equalsIgnoreCase(cCol)) {
                                    atomNumberMap.clear();
                                    if (isProteinStructure) {
                                          // create bonds and finish the molecule
                                          if (useRebondTool.isSet()) {
                                                try {
                                                      if(!createBondsWithRebondTool(oBP)) {
                                                            // Get rid of all potentially created bonds.
                                                            logger.info("Bonds could not be created using the RebondTool when PDB file was read.");                                             
                                                            oBP.removeAllBonds();                                             
                                                      }
                                                } catch (Exception exception) {
                                                      logger.info("Bonds could not be created when PDB file was read.");
                                                      logger.debug(exception);
                                                }
                                          }
                                          oSet.addMolecule(oBP);
                                    } else {
                                          oSet.addMolecule(molecularStructure);
                                    }
                              } else if (cCol.equals("MODEL ")) {
                                    // OK, start a new model and save the current one first *if* it contains atoms
                                    if (isProteinStructure) {
                                          if (oBP.getAtomCount() > 0) {
                                                // save the model
                                                oSet.addAtomContainer(oBP);
                                                oModel.setMoleculeSet(oSet);
                                                oSeq.addChemModel(oModel);
                                                // setup a new one
                                                oBP = new PDBPolymer();
                                                oModel = oFile.getBuilder().newChemModel();
                                                oSet = oFile.getBuilder().newMoleculeSet();                                   
                                          }
                                    } else {
                                          if (molecularStructure.getAtomCount() > 0) {
//                                               save the model
                                                oSet.addAtomContainer(molecularStructure);
                                                oModel.setMoleculeSet(oSet);
                                                oSeq.addChemModel(oModel);
                                                // setup a new one
                                                molecularStructure = oFile.getBuilder().newMolecule();
                                                oModel = oFile.getBuilder().newChemModel();
                                                oSet = oFile.getBuilder().newMoleculeSet();           
                                          }
                                    }
                              } else if ("REMARK".equalsIgnoreCase(cCol)) {                                 
                                    Object comment = oFile.getProperty(CDKConstants.COMMENT);
                        if (comment == null) {
                              comment = "";
                        }
                        if (lineLength >12) {
                              comment = comment.toString() + cRead.substring(11).trim() + "\n";
                              oFile.setProperty(CDKConstants.COMMENT, comment);
                        } else {
                              logger.warn("REMARK line found without any comment!");
                        }
                              } else if ("COMPND".equalsIgnoreCase(cCol)) {                                 
                        String title = cRead.substring(10).trim();
                        oFile.setProperty(CDKConstants.TITLE, title);
                              } 
                              
                              /*************************************************************
                               * Read connectivity information from CONECT records.
                               * Only covalent bonds are dealt with. Perhaps salt bridges
                               * should be dealt with in the same way..?
                               */
                              else if (readConnect.isSet() && "CONECT".equalsIgnoreCase(cCol))  {
                                    cRead.trim();
                                    if (cRead.length() < 16) {
                                          logger.debug("Skipping unexpected empty CONECT line! : ", cRead);
                                    } else {
                                    
                                          String bondAtom = cRead.substring(7, 11).trim();
                                          int bondAtomNo = Integer.parseInt(bondAtom);
                                          String bondedAtom = cRead.substring(12, 16).trim();
                                          int bondedAtomNo = -1;
                                          
                                          try   {bondedAtomNo = Integer.parseInt(bondedAtom);}
                                          catch(Exception e)      {bondedAtomNo = -1;}
                                          
                                          if(bondedAtomNo != -1)  {
                                                addBond(oBP, bondAtomNo, bondedAtomNo);
                                                logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
                                          }
                                          
                                          if(cRead.length() > 17) {
                                                bondedAtom = cRead.substring(17, 21);
                                                bondedAtom = bondedAtom.trim();
                                                try   {bondedAtomNo = Integer.parseInt(bondedAtom);}
                                                catch(Exception e)      {bondedAtomNo = -1;}
                                                
                                                if(bondedAtomNo != -1)  {
                                                      addBond(oBP, bondAtomNo, bondedAtomNo);
                                                      logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
                                                }
                                          }
                                          
                                          if(cRead.length() > 22) {
                                                bondedAtom = cRead.substring(22, 26);
                                                bondedAtom = bondedAtom.trim();
                                                try   {bondedAtomNo = Integer.parseInt(bondedAtom);}
                                                catch(Exception e)      {bondedAtomNo = -1;}
                                                
                                                if(bondedAtomNo != -1)  {
                                                      addBond(oBP, bondAtomNo, bondedAtomNo);
                                                      logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
                                                }
                                          }
                                          
                                          if(cRead.length() > 27) {
                                                bondedAtom = cRead.substring(27, 31);
                                                bondedAtom = bondedAtom.trim();
                                                try   {bondedAtomNo = Integer.parseInt(bondedAtom);}
                                                catch(Exception e)      {bondedAtomNo = -1;}
                                                
                                                if(bondedAtomNo != -1)  {
                                                      addBond(oBP, bondAtomNo, bondedAtomNo);
                                                      logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
                                                }
                                          }
                                    }
                              }
                              /*************************************************************/
                              
                              else if ("HELIX ".equalsIgnoreCase(cCol)) {
//                                  HELIX    1 H1A CYS A   11  LYS A   18  1 RESIDUE 18 HAS POSITIVE PHI    1D66  72
//                                            1         2         3         4         5         6         7
//                                  01234567890123456789012345678901234567890123456789012345678901234567890123456789
                                    PDBStructure structure = new PDBStructure();
                                    structure.setStructureType(PDBStructure.HELIX);
                                  structure.setStartChainID(cRead.charAt(19));
                                  structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(21, 25).trim()));
                                  structure.setStartInsertionCode(cRead.charAt(25));
                                  structure.setEndChainID(cRead.charAt(31));
                                  structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim()));
                                  structure.setEndInsertionCode(cRead.charAt(37));
                                  oBP.addStructure(structure);
                              } else if ("SHEET ".equalsIgnoreCase(cCol)) {
                                    PDBStructure structure = new PDBStructure();
                                    structure.setStructureType(PDBStructure.SHEET);
                                  structure.setStartChainID(cRead.charAt(21));
                                  structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(22, 26).trim()));
                                  structure.setStartInsertionCode(cRead.charAt(26));
                                  structure.setEndChainID(cRead.charAt(32));
                                  structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim()));
                                  structure.setEndInsertionCode(cRead.charAt(37));
                                  oBP.addStructure(structure);
                              } else if ("TURN  ".equalsIgnoreCase(cCol)) {
                                    PDBStructure structure = new PDBStructure();
                                    structure.setStructureType(PDBStructure.TURN);
                                  structure.setStartChainID(cRead.charAt(19));
                                  structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(20, 24).trim()));
                                  structure.setStartInsertionCode(cRead.charAt(24));
                                  structure.setEndChainID(cRead.charAt(30));
                                  structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(31, 35).trim()));
                                  structure.setEndInsertionCode(cRead.charAt(35));
                                  oBP.addStructure(structure);
                              } // ignore all other commands
                        }
                  } while (_oInput.ready() && (cRead != null));
            } catch (Exception e) {
                  logger.error("Found a problem at line:\n");
                  logger.error(cRead);
                  logger.error("01234567890123456789012345678901234567890123456789012345678901234567890123456789");
                  logger.error("          1         2         3         4         5         6         7         ");
                  logger.error("  error: " + e.getMessage());
                  logger.debug(e);
            }
            
            // try to close the Input
            try {
                  _oInput.close();
            } catch (Exception e) {
                  logger.debug(e);
            }
            
            // Set all the dependencies
            oModel.setMoleculeSet(oSet);
            oSeq.addChemModel(oModel);
            oFile.addChemSequence(oSeq);
            
            return oFile;
      }


Generated by  Doxygen 1.6.0   Back to index