Новосибирский институт органической химии им. Н.Н. Ворожцова СО РАН

Лаборатория изучения механизмов органических реакций

txyz2xyz


#!/usr/bin/perl -ws

our ($h,$help,$i);

if ($h || $help || !@ARGV) {
  (my $prog = $0) =~ s/.*[\/\\]//;
  print "
  Ttansform Tinker MMFF94 xyz (txyz) into XMol xyz.
  
  Usage: $prog [-i] file1.txyz file2.txyz ...
  
  If the file has the extension txyz, it is replaced with xyz; 
  otherwise, '.xyz' is appended to the filename.
  
  -i  File is edited in-place.
  \n";
  exit;
}


foreach my $file (@ARGV) {
  (my $name = $file) =~ s/\.txyz$//;
  my $mol = txyz2xyz($file);
  write_molden($mol, $i ? $file : "$name.xyz");
}

sub txyz2xyz {
#  my %mmff_atoms = (
#    'CR'=>'C',     'HC'=>'H',     'OR'=>'O',     'NR'=>'N',     'S'=>'S',     
#    'C=C'=>'C',    'HSI'=>'H',    'OC=O'=>'O',   'N=C'=>'N',    'S=C'=>'S',  
#    'CSP2'=>'C',   'HOR'=>'H',    'OC=C'=>'O',   'N=N'=>'N',    'S=O'=>'S',  
#    'C=O'=>'C',    'HO'=>'H',     'OC=N'=>'O',   'NC=O'=>'N',   '>S=N'=>'S', 
#    'C=N'=>'C',    'HOM'=>'H',    'OC=S'=>'O',   'NC=S'=>'N',   'SO2'=>'S',  
#    'CGD'=>'C',    'HNR'=>'H',    'ONO2'=>'O',   'NN=C'=>'N',   'SO2N'=>'S', 
#    'C=OR'=>'C',   'H3N'=>'H',    'ON=O'=>'O',   'NN=N'=>'N',   'SO3'=>'S',
#    'C=ON'=>'C',   'HPYL'=>'H',   'OSO3'=>'O',   'NR+'=>'N',    'SO4'=>'S',
#    'CONN'=>'C',   'HNOX'=>'H',   'OSO2'=>'O',   'NPYD'=>'N',   '=SO2'=>'S',
#    'COO'=>'C',    'HNM'=>'H',    'OSO'=>'O',    'NPYL'=>'N',   'SNO'=>'S',
#    'COON'=>'C',   'HN'=>'H',     'OS=O'=>'O',   'NC=C'=>'N',   'STHI'=>'S',     
#    'COOO'=>'C',   'HOCO'=>'H',   '-OS'=>'O',    'NC=N'=>'N',   'S-P'=>'S',
#    'C=OS'=>'C',   'HOP'=>'H',    'OPO3'=>'O',   'NC=P'=>'N',   'S2CM'=>'S',
#    'C=S'=>'C',    'HN=N'=>'H',   'OPO2'=>'O',   'NC%C'=>'N',   'SM'=>'S',
#    'C=SN'=>'C',   'HN=C'=>'H',   'OPO'=>'O',    'NSP'=>'N',    'SSMO'=>'S',
#    'CSO2'=>'C',   'HNCO'=>'H',   '-OP'=>'O',    'NSO2'=>'N',   'SO2M'=>'S',
#    'CS=O'=>'C',   'HNCS'=>'H',   '-O-'=>'O',    'NSO3'=>'N',   'SSOM'=>'S',
#    'CSS'=>'C',    'HNCC'=>'H',   'O=C'=>'O',    'NPO2'=>'N',   '=S=O'=>'S',
#    'C=P'=>'C',    'HNCN'=>'H',   'O=CN'=>'O',   'NPO3'=>'N',   'PO4'=>'P', 
#    'CSP'=>'C',    'HNNC'=>'H',   'O=CR'=>'O',   'NC%N'=>'N',   'PO3'=>'P', 
#    '=C='=>'C',    'HNNN'=>'H',   'O=CO'=>'O',   'NO2'=>'N',    'PO2'=>'P', 
#    'CR4R'=>'C',   'HNSO'=>'H',   'O=N'=>'O',    'NO3'=>'N',    'PO'=>'P',  
#    'CR3R'=>'C',   'HNPO'=>'H',   'O=S'=>'O',    'N=O'=>'N',    'PTET'=>'P',
#    'CE4R'=>'C',   'HNC%'=>'H',   'O=S='=>'O',   'NAZT'=>'N',   'P'=>'P',   
#    'CB'=>'C',     'HSP2'=>'H',   'O2CM'=>'O',   'NSO'=>'N',    '-P=C'=>'P',
#    'C%'=>'C',     'HOCC'=>'H',   'OXN'=>'O',    '=N='=>'N',    'FE+2'=>'Fe',
#    'CGD+'=>'C',   'HOCN'=>'H',   'O2N'=>'O',    'N+=C'=>'N',   'FE+3'=>'Fe',
#    'CNN+'=>'C',   'HOH'=>'H',    'O2NO'=>'O',   'N+=N'=>'N',   'LI+'=>'Li',
#    'C5A'=>'C',    'HNR+'=>'H',   'O3N'=>'O',    'NCN+'=>'N',   'NA+'=>'Na',
#    'C5B'=>'C',    'HIM+'=>'H',   'O-S'=>'O',    'NGD+'=>'N',   'K+'=>'K',
#    'CO2M'=>'C',   'HPD+'=>'H',   'O2S'=>'O',    'NPD+'=>'N',   'ZINC'=>'Zn',
#    'CS2M'=>'C',   'HNN+'=>'H',   'O3S'=>'O',    'NR%'=>'N',    'ZN+2'=>'Zn',
#    'C5'=>'C',     'HNC+'=>'H',   'O4S'=>'O',    'NM'=>'N',     'CA+2'=>'Ca',
#    'CIM+'=>'C',   'HGD+'=>'H',   'OSMS'=>'O',   'N5A'=>'N',    'CU+1'=>'Cu',
#                   'HN5+'=>'H',   'OP'=>'O',     'N5B'=>'N',    'CU+2'=>'Cu',
#                   'HOS'=>'H',    'O2P'=>'O',    'N2OX'=>'N',   'MG+2'=>'Mg',
#                   'HS'=>'H',     'O3P'=>'O',    'N3OX'=>'N',   'F'=>'F',  
#                   'HS=N'=>'H',   'O4P'=>'O',    'NPOX'=>'N',   'CL'=>'Cl',
#                   'HP'=>'H',     'O4CL'=>'O',   'N5M'=>'N',    'BR'=>'Br',
#                   'HO+'=>'H',    'OM'=>'O',     'N5'=>'N',     'I'=>'I',  
#                   'HO=+'=>'H',   'OM2'=>'O',    'NIM+'=>'N',   'F-'=>'F',
#                                  'OH2'=>'O',    'N5A+'=>'N',   'CL-'=>'Cl',
#                                  'OFUR'=>'O',   'N5B+'=>'N',   'BR-'=>'Br',
#                                  'O+'=>'O',     'N5+'=>'N',    'CLO4'=>'Cl',
#                                  'O=+'=>'O',    'N5AX'=>'N',   'SI'=>'Si', 
#                                                 'N5BX'=>'N',   
#                                                 'N5OX'=>'N',   
#                                                 'NPY'=>'N'
#  );

  my %mmff_atoms = map {$_=>1} qw(C H O N S P F CL BR I LI NA K ZN CA CU MG SI);
  
  my $file = shift;
  my $mol;
  open F, '<', $file or do {warn "Can't open $file: $!\n"; return};
  $mol->[0]{'Title'} = <F>;
  while (<F>) {
    my ($at,$x,$y,$z) = (split)[1..4];
    my $atom = $at;
    $at =~ s/^[^A-Z]+//;
    chop $at until exists $mmff_atoms{$at};
    do {warn "Undefined MMFF atom $atom\n"; return} unless $at;
    push @$mol, [ucfirst(lc $at),$x,$y,$z];
  }
  close F;
  return $mol;
}

sub write_molden {
  my $oldfh;
  
  if (@_ > 1 && !ref($_[-1])) {
    my $file = pop @_;
    open F, '>', $file or do {warn "Can't write to $file: $!\n"; return};
    $oldfh = select F;
  }
  
  foreach my $mol (@_) {
    #pp $mol;
    my $full_title = 1; my $ppm = 0; # Emulation of external parameters
    my $N = $#{$mol};
    print " $N\n";
	  if ($full_title) {
      print "$mol->[0]{Title}";
    }
    else {
      foreach my $f (qw/Energy Charge Mult Symmetry Dipole ZPE HoF Edisp Ellips sigma/) {
        #print "  $f $mol->[0]{$f}";
        print "  $f $mol->[0]{$f}" if defined $mol->[0]{$f};
      } 
      if ($mol->[0]{G}) {
        foreach my $g (@{$mol->[0]{G}}) {
          print "  G($g->[0]) $g->[1]";
        } 
      }
      print "\n";
    }
	  for (my $i=1; $i<=$N; $i++) {
      printf " %-2s %12.8f %12.8f %12.8f", @{$mol->[$i]};
      print "    $mol->[$i][4]" if $ppm && $mol->[$i][4];
      print "\n";
	  }
  }
  
  if ($oldfh) {
    close F;
    select $oldfh;
  }
}