Новосибирский институт органической химии им. Н.Н. Ворожцова СО РАН

Лаборатория изучения механизмов органических реакций

sort_by_smi


#!/usr/bin/perl -ws

our ($h,$help,$no_mask,$ref);
if ($h || $help) {
  (my $program = $0) =~ s/^.*[\/\\]//;
  print "Usage: $program [options] xyz-files
Dependencies: perl, obabel
Options:
-no_mask  print full filenames (via space)
-ref=file.xyz  print filenames having smiles coincided with that of file.xyz
";
  exit;
}

push @ARGV, $ref if $ref;

print "Run `obabel \@ARGV -o smi -`\n";
my @babel_out = `obabel @ARGV -o smi -`;
#print @babel_out;
print "\n";

my $ref_smi = (split ' ', $babel_out[-1])[0] if $ref;
pop @babel_out if $ref;

my %h_smi;
foreach my $s (@babel_out) {
  my ($smi, $file) = split ' ', $s;
  push @{$h_smi{$smi}}, $file;
}

my $max_len = 0;
foreach my $key (keys %h_smi) {
  my $len = length $key;
  $max_len = $len if $max_len < $len
}

foreach my $smi (sort {@{$h_smi{$b}}<=>@{$h_smi{$a}}} keys %h_smi) {
  next if ($ref and $smi ne $ref_smi);
  printf "%-${max_len}s  ", $smi;
  if ($no_mask) {
    print "@{$h_smi{$smi}}\n";
  } 
  else {
    print mask(@{$h_smi{$smi}}), "\n";
  }
}

sub mask {
  my @a = @_;
  my $end = delete_eq_end(\@a);
  my $begin = delete_eq_begin(\@a);
  return "$a[0]\n" if @a==1;
  return $begin . '{' . join(',', @a) . '}' . "$end\n";
}
sub delete_eq_end {
  my $aref = shift;
  return '' if @$aref == 1;
  my $end = '';
  while (1) {
    my @a = @$aref;
    $a0 = substr $a[0], -1, 1, '';
    for (my $i=1; $i<@a; $i++) {
      my $ai = substr $a[$i], -1, 1, '';
      return reverse($end) if $ai ne $a0;
    }
    $end .= $a0;
    @$aref = @a;
  }
}
sub delete_eq_begin {
  my $aref = shift;
  return '' if @$aref == 1;
  my $begin = '';
  while (1) {
    my @a = @$aref;
    $a0 = substr $a[0], 0, 1, '';
    for (my $i=1; $i<@a; $i++) {
      my $ai = substr $a[$i], 0, 1, '';
      return $begin if $ai ne $a0;
    }
    $begin .= $a0;
    @$aref = @a;
  }
}