Новосибирский институт органической химии им. Н.Н. Ворожцова СО РАН

Лаборатория изучения механизмов органических реакций

xyz2PES


#!/usr/bin/perl -ws

use Data::Dump;
our ($h,$help);
if ($h || $help) {
  (my $program = $0) =~ s/^.*[\/\\]//;
  print "
Create PES diagram autumatically
Usage: $program [options] *.xyz
Dependencies: perl, dot from graphviz

Options:
-name=PES  name for name.dot and name.svg files
-separator='-+'  Separator for TS's name. A-B and A--B are matched
-edges=filename  Если эта опуия присутствует, то взаимосвязи между 
   стационарными точками берутся из filename. A-TS-B
   -edges без аргументы означает -edges=edges
-add_edge='node1,node2'  дополнительные связи (сверх того, что добавляются
   штатно, через имена файлов или через -edges=filename). Например, чтобы 
   связать два ПС. Ноды разделяются запятыми, пары нод - пробелами.
   Просто -add_edge будет искать файл с именем add_edge, в котором прописаны
   дополнительные связи (построчно или через пробел).
-null=name  Zero level 
-delete_null  to exclude null-structure from PES
-kcal  if energy in xyz-files is in kcal/mol
-kJ    if energy in xyz-files is in kJ/mol
-dir=directory  Если *.xyz или *.xyzppm расположены в директории directory
       (относительно соответствующего index.html), то эта опция добавит путь
       к xyz-файлам в name.svg. Default -dir=.
       Если -dir задана (и не '.'), нужно просто переместить сгенерированный
       index.html в ту папку, в которой находится сама задаваемая директория.
-ZPE    К энергиям прибавляются ZPE-поправки
        Во 2-й строке xyz-файлов д.б. подстрока типа 'ZPE 0.095000' (a.e.)
-G      Аналогично, термические поправки 
        Во 2-й строке xyz-файлов д.б. 'G(298.25) 33.98' (kcal/mol)
        Д.б. только одна из -G и -ZPE (т.к. в терм. поправки ZPE уже входит)
-Edisp  Дисперсионные поправки Grimme. См. скрипт Edisp.
        Во 2-й строке xyz-файлов д.б. подстрока типа 'Edisp -9.79' (kcal/mol)
-levels=N  Default is number of structures but > 10
-fontsize=16
-shape=ellipse|box|plain  oval=ellipse, polygon=box
-fill_min=lightgrey  fillcolor for minima
-fill_TS=lightpink   fillcolor for transition states
-study=dot|neato|twopi|fdp|sfdp  For a preliminary assessment of the topology of 
       complex PES, which should be divided into simpler blocks (non-energy). 
       -study without argument equal -study=dot
-all_levels  Default behavior is do not draw empty level if previous and next 
    levels are also empty (for more compact diagram). This option turns off it.

-charges  Additional column of xyz file contains charges (not chemical shifts).

-I  Don't rewrite index.html
    
    Try $program -help for additional information.\n";
  if ($help) {
    print "
Convention about xyz-files:
1. Each xyz-file must contain energy in the 2-nd line
Energy in the first substring matching float number (e.g. Energy -233.46145057).
Energy units is hartree by default.
2. PES if specified by names of xyz-files (without .xyz extension).
For example, the simplest PES: A.xyz, B.xyz, A-B.xyz
where A-B is TS between A and B, '-' is default separator.
gv-file will contain edges \"A\" -- \"A-B\"; \"A-B\" -- \"B\";

Energy is recalculated in kcal. For the zero level, either the minimum energy 
or the energy of the stationary point specified by the -null option is taken.
Graphviz can not fix y-position of nodes (stationary points) according 
to energy precisely. Therefore, nodes with similar energies will be located 
on the same level. После генерирования svg-файла последний автоматически 
редактируется, и каждая нода немного сдвигается, чтобы ее положение 
соответствовало точной энергии. Это может приводить к небольшому перекрыванию
нод и пересечению связей между ними, тем менее вероятному, чем больше -levels. 

Обычно получается не точно запрошенное опцией -levels количество уровней, 
так как тики на энергетической оси выбираются такими, чтобы их энергии имели 
последнюю значащую цифру 1, 2 или 5.

В режиме -study можно попробовать различные движки, реализованные в graphviz.
Довольно наглядное изображение графа дает -study=neato. В режиме -study=twopi 
на консоль печатаются отдельные, не связанные друг с другом куски графа.
";
  }
  exit;
}

# Option's variables
our($null,$delete_null,$kcal,$kJ,$levels,$fontsize,$separator,$study,$all_levels,
    $name,$dir,$ZPE,$G,$Edisp,$fill_min,$fill_TS,$I,$add_edge);

# Defaults
$dir ||= '.';
$name ||= 'PES';
$separator ||= qr/-+/; # A-B and A--B are matched
#$levels ||= 50;
$fontsize ||= 16;
$shape ||= 'ellipse';
$fill_min ||= 'lightgrey';
$fill_TS ||= 'lightpink';

my $overlap = 'true' if !$overlap || $overlap eq '1';

# Defauls for js-script (relatively DocumentRoot of web-server)
my $popJSmol_js = '/js2/popJSmol2.js';
# Edit JSmol directory in popJSmol.js if it is not '/jsmol'

$shape = 'ellipse' if $shape eq 'oval';
$shape = 'polygon' if $shape eq 'box';

if ($edges && $edges eq '1') {
  $edges = 'edges';
}

my $mult = 627.5095;
$mult = 1 if $kcal;
$mult = 1/4.184 if $kJ;
my $num = qr/-?\d+(?:\.\d+)?/;
my (@mols, @statpt);
foreach my $file (@ARGV) {
  next if $file =~ /\.IRC\.xyz$/;
  (my $name = $file) =~ s/\.xyz(ppm)?$//;
  my ($mol) = read_molden($file);
  $mol->[0]{Name} = $name;
  $mol->[0]{File} = $file;
  if ($Edisp) {
    die "No Edisp in $file\n" unless exists $mol->[0]{Edisp};
    $mol->[0]{Energy} += $mol->[0]{Edisp}/$mult;
  }
  if ($G) {
    die "No G in $file\n" unless exists $mol->[0]{G};
    #print "$mol->[0]{G}[0][1]\n";;
    $mol->[0]{Energy} += $mol->[0]{G}[0][1]/$mult;
  }
  push @mols, $mol;
  push @statpt, [$name,$mol->[0]{Energy}];
}
$levels ||= @statpt<10 ? 10 : @statpt;
@statpt = sort {$a->[1]<=>$b->[1]} @statpt;
#my @statpt = statpt_xyz(@ARGV);
#warn "@$_\n" for @statpt;
my %statpt = map {$_->[0] => $_->[1]} @statpt;
$null = $statpt[0][0] unless defined $null;
die "No $null.xyz for -null\n" unless exists $statpt{$null};
my $en0 = $statpt{$null};
delete $statpt{$null} if $delete_null;
foreach my $key (keys %statpt) {
  $statpt{$key} = ($statpt{$key}-$en0)*$mult;
  #printf STDERR "%-10s %8.2f\n", $key, $statpt{$key};
}
#dd %statpt;
my $min_E = ($statpt[0][1]-$en0)*$mult;
my $max_E = ($statpt[-1][1]-$en0)*$mult;
#printf STDERR "%8.2f%8.2f\n", $min_E,$max_E;
my ($step,$FFF,@tics) = tics($min_E,$max_E,$levels);
#warn "@tics\n";

foreach my $tic (@tics) {
  my @ar;
  foreach my $node (keys %statpt) {
    #print sprintf("%.0f",$statpt{$node}/$step)*$step, "   ",$tic, "\n";
    if (sprintf("%.0f",$statpt{$node}/$step)*$step == $tic) {
      push @ar, $node;
    }
  }
  $tic = [$tic,[@ar]];
  #warn "$tic->[0], @{$tic->[1]}\n";
}
#dd @tics;
if (! $all_levels) {
  my @tics_tmp;
  for (my $i=0; $i<@tics; $i++) {
    if ( $i>0 && $i<$#tics && 
         !@{$tics[$i][1]} && !@{$tics[$i-1][1]} && !@{$tics[$i+1][1]}) {
      next;
    }
    push @tics_tmp, $tics[$i];
  }
  @tics = @tics_tmp;
}

sub edges_from_file {
  my ($sep,$file) = @_;
  my @edg;
  open L, '<', $file or die "Can't open $file: $!\n";
  while (<L>) {
    chomp;
    my $prop = $1 if s/\s*\[(.+?)\]\s*//;
    my ($s) = m/($sep)/;
    1 while s/$sep(\S+?)$sep/$s$1 | $1$s/;
    push @edg, map {[$_,$prop]} split(' \| ', $_);
  }
  close L;
  #dd @edg; exit;
  return @edg;
}

my %nodes = %statpt;
my @edges;
if ($edges) {  # if edges from file
  my %seen;
  foreach my $edge (edges_from_file($separator,$edges)) {
    my ($edg,$prop) = @$edge;
    my ($m1,$m2) = sort split $separator, $edg;
    next if exists $seen{"($m1,$m2)"};
    die "$edg in $edges file: no $m1\n" if !exists($statpt{$m1});
    die "$edg in $edges file: no $m2\n" if !exists($statpt{$m2});
    $seen{"($m1,$m2)"} = 1;
    $prop = $prop ? ", $prop" : '';
    push @edges, "\"$m1\" -- \"$m2\" [comment=\"Reaction $m1 into $m2\"$prop]\n";
  }
}
else { # edges from xyz names
  foreach my $ts (keys %nodes) {
    my ($m1,$m2) = split $separator, $ts, 2;
    if ($m1 && $m2) {
      push @edges, "\"$m1\" -- \"$ts\" [comment=\"Reaction $m1 into $ts\"]\n" if exists $nodes{$m1};
      push @edges, "\"$ts\" -- \"$m2\" [comment=\"Reaction $ts into $m2\"]\n" if exists $nodes{$m2};
    }
  }
}
if ($add_edge) {
  if ($add_edge eq '1') {
    open L, '<', "add_edge" or die "Can't open file add_edge: $!\n";
    $add_edge = join "\n", <L>; 
    close L;
  }
  $add_edge =~ s/\s+/ /g;
  1 while $add_edge =~ s/,([^, ]+),/,$1 $1,/;
  my @pairs = split ' ', $add_edge;
  foreach (@pairs) {
    my ($m1,$m2) = split ',', $_;
    push @edges, "\"$m1\" -- \"$m2\" [comment=\"Reaction $m1 into $m2\"]\n";
  }
}

#print for @edges;

open GV, '>', "$name.dot" or die "Can't write to $name.dot: $!\n";
print GV "graph {\n";
print GV "ranksep=\"0.0 equally\"; size=\"18,8\";\n";
#print GV "ranksep=\"0.0 equally\"; overlap=$overlap; size=\"18,8\";\n";
if (! $study) {
  print GV "{\n";
  print GV "node [shape=plain, fontsize=$fontsize, margin=\"0.0,0.0\"];\n";
  print GV "edge [weight=1000, color=white];\n";
  print GV "\"000 kcal\" --";
  print GV join(' -- ', map {"\"$_->[0] kcal\""} reverse @tics);
  if ($tics[0][0] > $min_E) {
    print GV ' -- "xxx"';
  }
  print GV ";\n}\n";
}
print GV "node [shape=$shape, fontsize=$fontsize, margin=\"0.0,0.0\"];\n";
print GV "edge [weight=1000];\n";
if (! $study) {
  foreach my $tic (@tics) {
    print GV "{ rank = same; \"$tic->[0] kcal\"; ";
    print GV map {"\"$_\"; "} @{$tic->[1]};
    print GV "}\n";
  }
}
print GV "// Nodes\n";
foreach my $v (sort {$nodes{$a}<=>$nodes{$b}} keys %nodes) {
  my $en = $nodes{$v};
  my $ts_flag;
  if ($edges) {
    $ts_flag = 1 if $v=~/^ts/i;
  }
  else {
    $ts_flag = 1 if $v=~$separator;
  }
  my $en_tic = sprintf "%.${FFF}f", sprintf("%.0f",$nodes{$v}/$step)*$step;
  print GV "\"$v\" [comment=\"Energy $en  Tic $en_tic\", color=transparent, style=filled, ";
  print GV 'fillcolor=', $ts_flag ? $fill_TS : $fill_min;
  print GV "]\n";
}
print GV "// Edges\n";
print GV @edges;
print GV "}\n";
close GV;


my @args = ("dot", "-T", "svg", "-Lg", "$name.dot");
#my @args = ("dot", "-T", "svg", "$name.dot");

if ($study) {
  $study = 'neato' if $study eq '1';
  @args = ("dot", "-K", "$study", "-T", "svg", "$name.dot");
}

#if ($study) {
#  $study = 'dot' if $study eq '1';
##  my @args = ("dot", "-K", "$study", "-T", "svg", "-o", "$name.svg", "-Lg", "$name.dot");
#  my @args = ("dot", "-K", "$study", "-T", "svg", "-o", "$name.svg", "$name.dot");
#  #warn "`@args`\n";
#  system(@args) == 0 or die "system @args failed: $?";
#  exit;
#}

#warn "@args\n";
open DOT, '-|', @args or die "Can't run @args : $!\n";
my $svg;
{
  local $/;
  $svg = <DOT>;
}
close DOT;

$svg =~ s/&#45;/-/g;
$svg =~ s/&#160;/ /g;
print_clusterization("$name.svg") if $study and $study eq 'twopi';

my ($svg_top,@svg_tics,@svg_nodes,@svg_edges);

if ($svg =~ /^.*?(<svg .*?>\s*<g .*?>)/s) {
  $svg_top = $1;
  $svg_top =~ s/<!--.*?-->\s*//sg;
  $svg_top =~ s/(width=")($num)/$1 . ($2+$fontsize)/e;
  #$svg_top =~ s/viewBox="[^"]+"\s*//;
}

if (! $study) {
while ($svg =~ /<!--\s*($num) kcal\s*-->\s*(<g.*?<\/g>)/gs) {
  my ($tic,$s) = ($1,$2);
  $s = $1 if $s=~/(<text.*?<\/text>)/;
  $s =~ s/ kcal//;
  $s =~ s/text-anchor="middle"/text-anchor="end"/ if $tic ne '000';
  push @svg_tics, [$tic,$s];
}
#dd @svg_tics;
}
while ($svg =~ /<!--\s*Energy ($num)\s+Tic ($num)\s*-->\s*(<g.*?<\/g>)/gs) {
  my ($en,$tic,$s) = ($1,$2,$3);
  my $name = $1 if $s=~s/<title>(.*?)<\/title>\n//;
  push @svg_nodes, [$name,$en,$tic,$s];
  #warn "$s\n";
}
while ($svg =~ /<!--\s*Reaction (.+?) into (.+?) -->\s*(<g.*?<\/g>)/gs) {
  my ($from,$to,$s) = ($1,$2,$3);
  $s = $1 if $s=~/(<path.*?\/>)/;
  push @svg_edges, [$from,$to,$s];
}

my ($tic0,$tic1,$step_pt);
if (! $study) {
  $tic0 = $1 if $svg_tics[0][1]=~/ y="($num)"/;
  $tic1 = $1 if $svg_tics[1][1]=~/ y="($num)"/;
  $step_pt = ($tic1 - $tic0)/$step;
  #warn "$step_pt = ($tic1 - $tic0)/$step\n";
}

my @SVG;
#$svg_top =~ s/(<svg.*?height=")($num)/$1 . ($2+$step_pt)/e;
push @SVG, "$svg_top\n";

push(@SVG, tics_scale(\@svg_tics, $fontsize, $step)) if ! $study;

foreach my $node (@svg_nodes) {
  #warn "$node->[0]: $node->[2]=>$node->[1]\n";
  my $adjust;
  if (! $study) {
    $adjust = ($node->[1] - $node->[2])*$step_pt;
    #warn "$adjust = ($node->[1] - $node->[2])*$step_pt\n";
    #$adjust = 0;
    if ($shape eq 'ellipse') {
      $node->[3] =~ s/(<ellipse.*? cy=")($num)/$1 . ($2-$adjust)/se;
    }
    elsif ($shape eq 'polygon' || $shape eq 'plain') {
      my $points = $1 if $node->[3] =~ /<polygon.*? points="([^"]+)"/;
      $points = join ' ',
        map {$_->[1] -= $adjust; join ',', @$_} 
          map {[split ',']} 
            split ' ', $points;
      $node->[3] =~ s/(<polygon.*? points=")[^"]+/$1$points/;
    }
    else {
      warn "Unknown shape $shape: no adjust\n";
    }
    $node->[3] =~ s/(<text.*? y=")($num)/$1 . ($2-$adjust)/se;
  }
  my ($Title,$File);
  my @PPM = ();
  foreach my $mol (@mols) {
    my $name = $mol->[0]{Name};
    if ($name eq $node->[0]) {
      $Title = $mol->[0]{Title};
      $File = $mol->[0]{File};
      push @PPM, 'molden_Freq' if -e "$name.freq";
      if (-e "$name.IRC.xyz") {
        my $irc = 'IRC';
        my ($dipole,$grad) = (1,1);
        my @mols = read_molden("$name.IRC.xyz");
        $dipole = 0 if grep {! $_->[0]{Dipole}} @mols;
        $grad = 0 if grep {! $_->[0]{Grad}} @mols;
        $irc .= '+Dipole' if $dipole;
        $irc .= '+Gradient' if $grad;
        push @PPM, $irc;
      }
      push @PPM, 'kcal' if $kcal;
      push @PPM, 'kJ' if $kJ;
      #dd $mol;
      my $max_length = max(map {scalar @{$mol->[$_]}} 1..$#{$mol});
      #warn "$name\t$max_length\n";
      if ($max_length == 5) {
        push @PPM, $charges ? 'xyzcharges' : 'xyzppm';
      }
      elsif ($max_length == 7) {
        push @PPM, 'xyzfreq';
      }
      elsif ($max_length == 8) {
        push @PPM, 'xyzfreq', $charges ? 'xyzcharges' : 'xyzppm';
      }
      if (-e "$name.xyzppm") {
        my ($molppm) = read_molden("$name.xyzppm");
        #dd $molppm;
        for (my $i=1; $i<@$mol; $i++) {
          if ($max_length == 4) {
            $mol->[$i][4] = $molppm->[$i][4];
            push @PPM, 'xyzppm';
          }
          elsif ($max_length == 7) {
            $mol->[$i][7] = $molppm->[$i][4];
            push @PPM, 'xyzppm';
          }
        }
        write_molden($mol,$File);
      }
      last;
    }
  }
  #my $mol = grep {$_->[0]{Name} eq $node->[0]} @mols;
  #print "'$node->[0]'  '$mol->[0]{Name}'\n";
  
  #my $xxx = "style=\"cursor: pointer\" onClick=\"popJSmol('$dir/$File', '$Title', '@PPM')\"";
#  my $PPM = "@PPM";
#  $PPM =~ s/molden_Freq IRC/TS/;
#  $PPM =~ s/^\s+//; $PPM =~ s/\s+$//;
  my $xxx = "onMouseOver=\"this.style.cursor='pointer'\" onClick=\"popJSmol('$dir/$File', '$Title', '@PPM')\"";
  #warn "$File '@PPM'\n";
  my $ttt = "<title>".(sprintf "%.2f", $node->[1])." kcal<\/title>"; 
  $node->[3] =~ s/(<$shape .+?)\/>/$1 $xxx>\n$ttt\n<\/$shape>/;
  $node->[3] =~ s/(<text .+?)>/$1 $xxx>\n/;
  $node->[3] =~ s/(<\/text>)/\n$ttt\n$1/;
  $node->[3] =~ s/$/\n/;
  push @SVG, "$node->[3]\n";
  
  if (! $study) {
    foreach my $edge (@svg_edges) {
      if ($edge->[0] eq $node->[0]) {
        $edge->[2] =~ s/(M\s*$num,)($num)/$1 . ($2-$adjust)/e;
      }
      elsif ($edge->[1] eq $node->[0]) {
        $edge->[2] =~ s/(C.+,)($num)/$1 . ($2-$adjust)/e;
      }
    }
  }
}
foreach (@svg_edges) {
  #warn "from $_->[0] to $_->[1]\n";
  push @SVG, "$_->[2]\n";
}
push @SVG, "</g>\n</svg>\n";

#if ($dir ne '.') {
#  my @SVG_local = map {s/$dir\\//g} @SVG;
#  
#}

open SVG, '>', "$name.svg" or die "Can't write to $name.svg: $!\n";
print SVG @SVG;
close SVG;

#  make_index_html('Topology of PES');
if (! -e 'index.html' or ! $I) {
  $study ? make_index_html('Topology of PES') : make_index_html();
}


sub make_index_html {
  my $title = 'PES';
  my $driver = '';
  if (@_) {
    $title = $_[0];
    $driver = " ($study)";
  }
  
  my $htm = 'index.html';
  open HTM, '>', $htm or die "Can't write $htm: $!\n";
  #warn "Create $htm\n\n";
  print HTM <<HTM;
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>

<head>
  <meta http-equiv="content-type" content="text/html; charset=utf-8">
  <title>$title</title>
  <script TYPE="text/javascript" src="/jsmol/jquery/jquery.min.js"> </script>
  <script TYPE="text/javascript" src="$popJSmol_js"> </script>
</head>

<body style="background-color:white;">
<h1>$title$driver</h1>
HTM
if (! @_) {
print HTM <<HTM;
<small>
  Click on $shape to view 3D structure in popup window (run JSmol)<br>
</small>
<br>
HTM
}
print HTM <<HTM;
<!--#include virtual="$dir/$name.svg" -->
<br>
</body>

</html>
HTM
  close HTM;
}

sub tics {
  my ($min_E,$max_E,$levels) = @_;
  #warn "$min_E,  $max_E,  $levels\n";
  my @tics;
  my $step = ($max_E-$min_E)/$levels;
  sprintf("%e",$step) =~ /^(\d).*e(.*)/;
  my $FFF = $2<0 ? abs($2) : 0;
  #warn "$step $FFF\n";
  if    ($1 >= 5) { $step = 10*10**$2 }
  elsif ($1 >= 2) { $step =  5*10**$2 }
  else            { $step =  2*10**$2 }
  sprintf("%e",$step) =~ /^(\d).*e(.*)/;
  $FFF = $2<0 ? abs($2) : 0;
  #warn "$step $FFF\n";
  #print "$step\n";
  my $tic = sprintf("%.0f",$min_E/$step)*$step;
  my $TIC = sprintf("%.0f",$max_E/$step)*$step;
  while ($tic <= $TIC) {
    $tic = 0 if abs($tic)<1e-10;
    push @tics, sprintf("%.${FFF}f",$tic);
    $tic += $step;
  }
  if ($max_E-$tics[-1] >= $step/2) {
    push @tics, sprintf("%.${FFF}f",$tics[-1]+$step)
  }
  if ($tics[0]-$min_E >= $step/2) {
    unshift @tics, sprintf("%.${FFF}f",$tics[0]-$step)
  }
  #warn "$min_E,  $max_E,  $step\n@tics\n";
  return $step, $FFF, @tics;
}

sub statpt_xyz {
  
  my @statpt;
  foreach my $file (@_) {
    (my $name = $file) =~ s/\.xyz$//;
    open L, '<', $file or die "Can't open $file: $!\n";
    <L>;
    <L> =~ /(-?\d+(?:\.\d+)?)/ && push(@statpt, [$name,$1]);
    close L;
  }
  return sort {$a->[1]<=>$b->[1]} @statpt;
}

sub read_molden {
 ############################################################################
 ## Молекула -- ссылка на массив, 0-й элемент -- свойства, следующие - атомы.
 ## Свойства -- ссылка на хэш с ключами Energy, Symmetry
 ## Атом -- ссылка на массив [atom, x, y, z, ppm] (ppm может не быть).
 ##
 ## Читает xyz. Параметры - имена xyz-файлов. Если параметров нет, то <>.
 ## Возвращает массив найденных молекул.
 ############################################################################
  local @ARGV = @_ ? @_ : @ARGV;
  my $num = qr/-?\d+(?:\.\d+)?/;
  my @mols;
  my $line;
  LOOP:
  while ($line || defined($line = <>)) {
    #print $line;
    if ($line =~/^\s*(\d+)\s*$/) {
      my @mol;
      my $N = $1;
      last LOOP if eof();
      next LOOP if eof(ARGV);
      $line = <>;
      chomp $line;
      $mol[0]{Title} = $line;
      ($mol[0]{Energy}) = $line =~ /(?:\s|^|=)($num)(?:\s|$)/;
      ($mol[0]{Symmetry}) = $line =~ /symm\S*\s+(\S+)/i;
      ($mol[0]{Charge}) = $line =~ /Charge\s+(-?\d+)/;
      ($mol[0]{Mult}) = $line =~ /Mult\s+(\d+)/;
      ($mol[0]{HoF}) = $line =~ /HoF\s+($num)/o;
      ($mol[0]{Edisp}) = $line =~ /Edisp\s+($num)/o;
      ($mol[0]{ZPE}) = $line =~ /ZPE\s+($num)/o;
      ($mol[0]{Dipole}) = $line =~ /Dipole\s+($num)/o;
      ($mol[0]{Grad}) = $line =~ /Grad\s+($num)/o;
      while ($line =~ /G\(($num)\)\s+($num)/g) {
        push @{$mol[0]{G}}, [$1,$2];
      }
      for (my $i=1; $i<=$N; $i++) {
        last LOOP if eof();
        next LOOP if eof(ARGV);
        $line = <>;
        #print $line;
        if ($line =~ /^\s*([A-Z]{1,2})\s+($num)\s+($num)\s+($num)\s*(.*)/io) {
          $mol[$i] = [$1,$2,$3,$4,$5];
          pop @{$mol[$i]} if $mol[$i][4] eq '';
        } else {
          next LOOP;
        }
      }
      push @mols, \@mol;
      last LOOP if eof();
    } else {
      undef $line;
    }
  }
  return @mols;
}

sub write_molden {
  my $oldfh;
  
  if (@_ > 1 && !ref($_[-1])) {
    my $file = pop @_;
    open F, '>', $file or do {warn "Can't write to $file: $!\n"; return};
    $oldfh = select F;
  }
  
  foreach my $mol (@_) {
    #pp $mol;
    my $N = $#{$mol};
    print " $N\n";
	  print $mol->[0]{Title} || '', "\n";
	  for (my $i=1; $i<=$N; $i++) {
      printf " %-2s %12.8f %12.8f %12.8f", @{$mol->[$i]};
      print "    $mol->[$i][4]" if $mol->[$i][4];
      print "\n";
	  }
  }
  
  if ($oldfh) {
    close F;
    select $oldfh;
  }
}

sub min {
  my $min = 1e10;
  foreach my $val (@_) {
    $min = $val if $min > $val;
  }
  return $min;
}
sub max {
  my $max = -1e10;
  foreach my $val (@_) {
    $max = $val if $max < $val;
  }
  return $max;
}

sub tics_scale {
  my @svg_tics = @{$_[0]};
  my $fontsize = $_[1];
  my $step = $_[2];
  my @SVG;
  
  my $razr = '
<g transform="translate(0 0) scale(1)">
  <path d="M 0 -2 V 2" stroke-width="6" stroke="white" />
  <text text-anchor="middle" font-family="Times,serif" font-size="16" x="0" y="2.05">~</text>
  <text text-anchor="middle" font-family="Times,serif" font-size="16" x="0" y="6.05">~</text>
</g>';

  my $min_x = min(map {$_->[1]=~/ x="($num)"/} @svg_tics);
  foreach (@svg_tics) {$_->[1] =~ s/ x="($num)"/ x="$min_x"/};
  #@svg_tics = map {$->[1] =~ s/ x="($num)"/ x="$min_x"/; $_} @svg_tics;
  #print "$min_x\n";
  
  my $svg_tic0 = shift @svg_tics;
  
  my ($min_y) = $svg_tics[0][1]=~/ y="($num)"/;
  my ($max_y) = $svg_tics[-1][1]=~/ y="($num)"/;
  #my $delta = ($max_y-$min_y)/$#svg_tics;
  
  (my $s0 = $svg_tic0->[1])=~ s/>000</>kcal\/mol</;
  $s0 =~ s/ x="($num)"/ x="$min_x"/;
  $s0 =~ s/( y=")$num/$1 . ($min_y-$fontsize*1.5)/e;
  push @SVG, "$s0\n";
  
  my $axis_x = $min_x - $fontsize/16;
  my $axis_y1 = $min_y-$fontsize/2;
  my $axis_y2 = $max_y;
  my $axis_width = $fontsize/16;
  push @SVG, ("<path d=\"M $axis_x $axis_y1 L $axis_x $axis_y2\" stroke-width=\"$axis_width\" stroke=\"black\" />\n");
  
  for (my $i=0; $i<@svg_tics; $i++) {
    if ($i>0 && $i<$#svg_tics && 
        $svg_tics[$i][0] ne '~' && 
        ($svg_tics[$i][0]-$svg_tics[$i+1][0]>$step*1.01)
       ) {
      $svg_tics[$i+1][0] = $svg_tics[$i][0] = '~';
      my $y1 = $1 if $svg_tics[$i][1]=~/ y="($num)"/;
      my $y2 = $1 if $svg_tics[$i+1][1]=~/ y="($num)"/;
      my $y12 = ($y1+$y2)/2-$fontsize/4;
      (my $s = $razr) =~ s/translate\(0 0\)/translate($axis_x $y12)/;
      $s =~ s/(scale\()(1)/$1 . ($2*$fontsize\/16)/e;
      push @SVG, "$s\n";
    }
    #warn "$svg_tics[$i][0]\n";
    $svg_tics[$i][1]=~s/>($num)</>$1 -</;
    foreach (@svg_tics) {$_->[1] =~ s/ x="($num)"/ x="$min_x"/};
    push @SVG, "$svg_tics[$i][1]\n";
  }
  
  unshift @SVG, "<g>\n";
  push @SVG, "</g>\n";
  #dd @SVG;
  return @SVG;
}
sub print_clusterization {
  my $svg = shift;
  my %H;
  open L, '<', $svg or die "Can't open $svg: $!\n";
  while (<L>) {
    next unless m/^\s*<text /;
    s/&#45;/-/g;
    s/&#160;/ /g;
    my ($x) = m/ x="($num)"/;
    my ($y) = m/ y="($num)"/;
    my ($text) = m/>(.+)<\/text>/;
    push @{$H{"$x,$y"}}, $text;
  }
  close L;
  foreach (sort {@$b<=>@$a} values %H) {
    print join(',', @$_), "\n";
  }
}