Новосибирский институт органической химии им. Н.Н. Ворожцова СО РАН

Лаборатория изучения механизмов органических реакций

gmsv


#!/usr/bin/perl -ws

#use Data::Dump::Color('dd');

our ($h,$help,$n,$b,$molden);
if ($h || $help) {
  (my $program = $0) =~ s/^.*[\/\\]//;
  print "Usage: $program [-n=num] file.dat
Print \$VEC group(s) (GAMESS) to stdout.

See xyz -h for format of -n option.
  -n=-1 (last vectors) as default
  -b is synonym of -n=1 (first vectors)
  
Example: $program ~/scr/file0.dat >> file1.inp
  
Also EIGENVECTORS (for start structure) or MOLECULAR ORBITALS (for finish one)
from out-file may be converted to \$VEC group:
         $program file0.out >> file1.inp
And MCSCF NATURAL and OPTIMIZED ORBITALS
  -molden  also write file0.mos (molden format) (so far only from out-files)

"
;  exit;
}

my $f = qr(-?\d+\.\d+);
my $BA = 0.52917721;
$n ||= -1;
$n = 1 if $b;

foreach my $file (@ARGV) {
  (my $name = $file) =~ s/\.(dat|out)$//;
  
  open L, '<', $file or die "Can't open $file: $!\n";
  
  # GAMESS file type
  my $type;
  my $line = <L>;
  if ($line =~ /^ \$DATA\s*$/) {
    $type = 'dat';
  }
  else {
    while (<L>) {
      if (/^\s+\*\s+GAMESS VERSION =/) {
        $type = 'out';
        last;
      }
    }
  }
  warn "\n$file: $type-file\n" if $type;

  my @V; # blocks of vectors as strings
  my @V_matrix; # blocks of vectors as matrixes (for out-files)
  my @V_name;
  my (@mol0,@basis,@mol1);

  if ($type eq 'dat') {
    my $vectors_count = 0; # counter 0f blocks
    while (<L>) {
      if (/^ \$VEC/ .. /^ \$END/) {
        push @V_name, "lines $.-" if /^ \$VEC/;
        $V[$vectors_count] .= $_;
        do {$vectors_count++; $V_name[-1] .= $.} if /^ \$END/;
      }
    }
    #print @V; exit;
  }
  elsif ($type eq 'out') {
    #warn "Don't realize for out-file. Sorry\n";
    #exit;
    my ($symmetry,
        $shells,$basfun,$electrons,$charge,$mult,$alpha,$beta,$N,
        $scftyp,$runtyp,
        %mol0_label);
    while (<L>) {
      $symmetry = $1 if /THE POINT GROUP OF THE MOLECULE IS (\w+)/;
      $symmetry .= "_$1" if (/THE ORDER OF THE PRINCIPAL AXIS IS\s+(\d*)/ && $1 > 1);
      $symmetry .= "_$1" if (/THE ORDER OF THE PRINCIPAL AXIS IS\s+(\d*)/ && $1 > 1);
      if (/ATOM      ATOMIC                      COORDINATES \(BOHR\)/) {
        <L>;
        while (<L>) {
          if (/^\s*(\w+)\s+($f)\s+($f)\s+($f)\s+($f)/) {
            push @mol0, [element(int $2),$3*$BA,$4*$BA,$5*$BA];
            $mol0_label{$1} = element(int $2);
          }
          last if /^\s*$/;
        }
        #dd @mol0; exit;
      }
      if (/ATOMIC BASIS SET/) {
        my $at_count = -1;
        while (<L>) {
          chomp;
          if (/^\s*(\w+)\s*$/) {
            $at_count++;
            $basis[$at_count][0] = $mol0_label{$1};
            next;
          }
          if (/^\s+\d+\s+/) {
            my ($shell,$type,$primitive,$exponent,$contractions) = split ' ', $_, 5;
            $contractions = [split ' ', $contractions];
            #$exponent = int $exponent; @$contractions = map {int} @$contractions;
            if ($basis[$at_count][1] && $shell == $basis[$at_count][-1][0]) {
              push @{$basis[$at_count][-1][2]}, 
                   [$primitive,$exponent,$contractions];
            }
            else {
              push @{$basis[$at_count]}, 
                   [$shell,$type,[[$primitive,$exponent,$contractions]]];
            }
          }
          if (/TOTAL NUMBER OF BASIS SET SHELLS\s+=\s*(\d+)/) {
            $shells = $1;
            die "Problem with SHELLS\n" if $shells != $basis[-1][-1][0];
            last;
          }
        }
        #prn(@basis); exit;
        for (my $i=$#basis; $i>=0; $i--) {
          my $d = $basis[$i][1][0]-$basis[$i-1][-1][0]-1;
          $d = $basis[$i][1][0]-1 if $i == 0;
          my $n = $#{$basis[$i]};
          #print "($basis[$i][-1][0]-$basis[$i-1][1][0]-1)/$#{$basis[$i]}=$d/$n\n";
          die "$d%$n != 0\n" if $d%$n;
          foreach (1..$d/$n) {
            splice @basis, $i, 0, copy_at($basis[$i]);
            for (my $j=1; $j<=$n; $j++) {
              $basis[$i][$j][0] -= $n;
            }
          }
        }
        #prn(@basis); exit;
      }
      $basfun = $1 if m/NUMBER OF CARTESIAN GAUSSIAN BASIS FUNCTIONS =\s+(\d+)/;
      $electrons = $1 if m/NUMBER OF ELECTRONS\s+=\s+(\d+)/;
      $charge = $1 if m/CHARGE OF MOLECULE\s+=\s+(-?\d+)/;
      $mult = $1 if m/SPIN MULTIPLICITY\s+=\s+(\d+)/;
      $alpha = $1 if m/NUMBER OF OCCUPIED ORBITALS \(ALPHA\)\s+=\s+(\d+)/;
      $beta = $1 if m/NUMBER OF OCCUPIED ORBITALS \(BETA \)\s+=\s+(\d+)/;
      $N = $1 if m/TOTAL NUMBER OF ATOMS\s+=\s+(\d+)/;
      # from $CONTRL OPTIONS
      $scftyp = $1 if m/SCFTYP=\s*(\w+)/;
      $runtyp = $1 if m/RUNTYP=\s*(\w+)/;
      last if m/\$SYSTEM OPTIONS/;
    }
    
    die "Wrong NUMBER OF ATOMS\n" if $N != @basis or $N != @mol0;
    
    my @vectors_re;
    if ($scftyp eq 'MCSCF') {
      @vectors_re = ('MCSCF NATURAL ORBITALS', 'MCSCF OPTIMIZED ORBITALS');
    }
    else {
      @vectors_re = ('EIGENVECTORS', 'MOLECULAR ORBITALS')
    }
    my $optimized = '';
    while (<L>) {
      $optimized = " for $1" if /\b([\w\s]+\b) LOCATED/;
      if ($optimized && /COORDINATES OF ALL ATOMS ARE \(ANGS\)/) {
        <L>; <L>;
        while (<L>) {
          if (/^\s*(\w+)\s+($f)\s+($f)\s+($f)\s+($f)/) {
            push @mol1, [element(int $2),$3,$4,$5];
          }
          last if /^\s*$/;
        }
        #dd @mol1; exit;
      }
      foreach my $re (@vectors_re) {
        if (/^\s+$re$/) {
          <L>; <L>;
          if ($optimized && $scftyp eq 'UHF') {
            $_ = <L>;
            $re .= " (ALPHA SET)" if /ALPHA SET/;
            <L>;
            push @V_name, "$re$optimized - starting from line $.";
            my ($str,$mat) = readV($basfun);
            $mat->[0][0] = $V_name[-1];
            push @V, $str;
            push @V_matrix, $mat;
            #dd(@V_matrix); exit;
            
            if (/BETA SET/) {
              #warn "BETA SET\n";
              $re =~ s/ALPHA/BETA/;
              <L>;
              push @V_name, "$re$optimized - starting from line $.";
              my ($str,$mat) = readV($basfun);
              $mat->[0][0] = $V_name[-1];
              push @V, $str;
              push @V_matrix, $mat;
              #dd(@V_matrix); exit;
            }
          }
          else {
            push @V_name, "$re$optimized - starting from line $.";
            my ($str,$mat) = readV($basfun);
            $mat->[0][0] = $V_name[-1];
            push @V, $str;
            push @V_matrix, $mat;
            #dd(@V_matrix); exit;
          }
        }
      }
    }
    if ($scftyp eq 'UHF') {
      $V_name[0] =~ s/EIGENVECTORS/EIGENVECTORS (ALPHA SET)/;
      $V_matrix[0][0][0] =~ s/EIGENVECTORS/EIGENVECTORS (ALPHA SET)/;
      $V_name[1] =~ s/EIGENVECTORS/EIGENVECTORS (BETA SET)/;
      $V_matrix[1][0][0] =~ s/EIGENVECTORS/EIGENVECTORS (BETA SET)/;
    }
  }

  else {
    warn "Unknown type of file\n";
    exit;
  }
  close L;

  die "No vectors found\n" unless @V;
  my $vectors_found = $#V+1;
  warn "$vectors_found vectors found\n";
  for (my $i=0; $i<@V_name; $i++) {
    printf STDERR "%3d  %s\n", $i+1, $V_name[$i];
    
  }

  my @vector = parse_n($n,$#V+1);
  #print "@vector\n";
  foreach my $i (@vector) {
    $i--;
    print $V[$i];
    my $last_line = $1 if $V[$i] =~ /(.+)\n \$END/;
    my $nnn = substr $last_line, 2, 3;
    my $cnt = $V[$i] =~ tr/\n/\n/;
    my $i1 = ($cnt-2)/$nnn;
    my $i2 = ($nnn-1)*5 + (length($last_line)-5)/15;
    my $V_name_str = $V_name[$i] ? "($V_name[$i]) " : '';
    my $ii = $i+1;
    warn "Vector #$ii ${i1}x$i2 ${V_name_str}was extracted to stdout\n";
    if ($molden and $type eq 'out') {
      warn "Write vector #$ii to $name-$ii.mos (molden format)\n";
      my $geom = ($V_name[$i] =~ / for /) ? \@mol1 : \@mol0;
      write_mos($geom,\@basis,$V_matrix[$i],"$name-$ii.mos");
    }
    $i++;
  }
  #warn "Number(s) of extracted block(s) of vectors: @vector\n";
}

sub readV {
  my $basfun = shift;
  my @EV;
  while (<L>) {
    last if (/END OF .+? CALCULATION/ || /^ +-+$/ || /\*\*\*\*/);
    if (/^                  (\s+\d+)+$/) {
      #print; next;
      my @ind = split;
      $_ = <L>; 
      my @en = split;
      for (my $k=0; $k<@ind; $k++) {
        $EV[$ind[$k]][0][0] = $en[$k]
      }
      $_ = <L>;
      my @sym = split;
      for (my $k=0; $k<@ind; $k++) {
        $EV[$ind[$k]][0][1] = $sym[$k]
      }
      for (my $j=1; $j<=$basfun; $j++) {
        $_ = <L>;
        my @coef = split;
        my @fff = splice @coef, 0, 4;
        #my @coef = (split)[-@ind..-1];
        #print "$j @coef @ind\n";
        for (my $k=0; $k<@ind; $k++) {
          $EV[0][$j] = \@fff;
          $EV[$ind[$k]][$j] = $coef[$k];
        }
      }
    }
  }
  #dd @EV; 
  #print scalar(@EV), " ", scalar(@{$EV[3]}), "\n";
  #exit;
  my $EV = " \$VEC";
  for (my $i=1; $i<@EV; $i++) {
    for (my $j=1; $j<@{$EV[$i]}; $j++) {
      #my $count;
      if ($j%5 == 1) {
        $EV .= sprintf "\n%2d%3d", $i%100, (int($j/5)+1)%1000;
      }
      $EV .= sprintf "%15.8E", $EV[$i][$j];
    }
    #print "\n";
  }
  $EV .= "\n \$END\n";
  #dd \@EV; exit;
  return ($EV,\@EV);
}

sub parse_n {
  my ($n,$N) = @_;
  #warn "$n,  $N\n";
  $n =~ s/(-\d+)/$N+$1+1/ge;
  my @vector;
  foreach (split /,/, $n) {
    if (/(\d+)\.\.(\d+)/) { # Раскрываем интервалы (i-n)
		  my @a = $1<=$2 ? $1..$2 : reverse $2..$1;
		  push @vector, @a;
	  }
    else {push @vector, $_}
  } 
  #warn "@vector\n";
  die "$n out of range\n" if grep {$_<0 or $_>$N} @vector;
  return @vector;
}

sub element {
 ############################################################################
 ## Возвращает атомный номер по символу либо символ по номеру
 ## или undef при отсутствии элемента в таблице Менделеева
 ############################################################################
  # Таблица Менделеева
  my @ATOM = qw (XX
                 H                                                  He
                 Li Be                               B  C  N  O  F  Ne
                 Na Mg                               Al Si P  S  Cl Ar
                 K  Ca Sc Ti V  Cr Mn Fe Co Ni Cu Zn Ga Ge As Se Br Kr
                 Rb Sr Y  Zr Nb Mo Tc Ru Rh Pd Ag Cd In Sn Sb Te I  Xe
                 Cs Ba La
                         Ce Pr Nd Pm Sm Eu Gd Tb Dy Ho Er Tm Yb
                       Lu Hf Ta W  Re Os Ir Pt Au Hg Tl Pb Bi Po At Rn
                 Fr Ra Ac
                         Th Pa U  Np Pu Am Cm Bk Cf Es Fm Md No
									     Lr Rf Db Sg Bh Hs Mt Ds Rg
                );
  my $atom = shift;
  if ($atom=~/^\d+$/) {
    return undef if $atom > $#ATOM;
    return $ATOM[$atom];
  }
  $atom = ucfirst $atom;
  my %ATOM;
  @ATOM{@ATOM} = 0..$#ATOM;
  return $ATOM{$atom} if exists $ATOM{$atom};
  return undef;
}

sub copy_at {
  my @a;
  my @at = @{$_[0]};
  $a[0] = $at[0];
  for (my $i=1; $i<@at; $i++) {
    $a[$i] = [@{$at[$i]}];
  }
  return \@a;
}

sub write_mos {
  my ($mol,$bas,$vec,$file) = @_;
  #dd $mol;
  my $N = @$mol;
  open L, '>', $file or die "Can't write to $file: $!\n";
  print "[Molden Format]\n";
  print "[Atoms] Angs\n";
  for (my $i=0; $i<$N; $i++) {
    printf "%2s%6d%3d%13.6f%13.6f%13.6f\n", 
           $mol->[$i][0],
           $i+1, 
           element($mol->[$i][0]),
           $mol->[$i][1], 
           $mol->[$i][2],
           $mol->[$i][3];
  }
  #dd $bas->[0];
  print "[GTO]\n";
  my %lbl = (S=>'s',P=>'p',L=>'sp',D=>'d',F=>'f',G=>'g');
  my $print_mldn = sub {
    my $E = sprintf "%17.9E", $_[0];
    $E =~ s/E/D/;
    $E =~ s/\.//;
    $E =~ s/(...)$/sprintf "%+03d", $1+1/e;
    $E =~ s/(\d)/0.$1/;
    print "$E";
  };
  for (my $i=0; $i<$N; $i++) {
    printf "%2d 0\n", $i+1;
    for (my $j=1; $j<@{$bas->[$i]}; $j++) {
      #dd $bas->[$i][$j];
      printf " %-2s%4d 1.00\n", $lbl{$bas->[$i][$j][1]}, scalar(@{$bas->[$i][$j][2]});
      foreach my $aa (@{$bas->[$i][$j][2]}) {
        #print "$aa->[1] @{$aa->[2]}\n";
        $print_mldn->($aa->[1]);
        foreach (@{$aa->[2]}) {
          $print_mldn->($_);
        }
        print "\n";
      }
    }
    print "\n";
  }
  print "\n[MO]\n";
  #dd $vec;
  close L;
}
sub prn { # For debug BASIS SET
  my @basis = @_;
  foreach my $bb (@basis) {
    print "$bb->[0]\n";
    for (my $i=1; $i<@$bb; $i++) {
      print "  $bb->[$i][0],$bb->[$i][1] - ";
      foreach (@{$bb->[$i][2]}) {
        print "$_->[0] "
      }
      print "\n";
    }
    print "\n";
  }
}