Новосибирский институт органической химии им. Н.Н. Ворожцова СО РАН Лаборатория изучения механизмов органических реакций |
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sort_by_smi#!/usr/bin/perl -ws our ($h,$help,$no_mask,$ref); if ($h || $help) { (my $program = $0) =~ s/^.*[\/\\]//; print "Usage: $program [options] xyz-files Dependencies: perl, obabel Options: -no_mask print full filenames (via space) -ref=file.xyz print filenames having smiles coincided with that of file.xyz "; exit; } push @ARGV, $ref if $ref; print "Run `obabel \@ARGV -o smi -`\n"; my @babel_out = `obabel @ARGV -o smi -`; #print @babel_out; print "\n"; my $ref_smi = (split ' ', $babel_out[-1])[0] if $ref; pop @babel_out if $ref; my %h_smi; foreach my $s (@babel_out) { my ($smi, $file) = split ' ', $s; push @{$h_smi{$smi}}, $file; } my $max_len = 0; foreach my $key (keys %h_smi) { my $len = length $key; $max_len = $len if $max_len < $len } foreach my $smi (sort {@{$h_smi{$b}}<=>@{$h_smi{$a}}} keys %h_smi) { next if ($ref and $smi ne $ref_smi); printf "%-${max_len}s ", $smi; if ($no_mask) { print "@{$h_smi{$smi}}\n"; } else { print mask(@{$h_smi{$smi}}), "\n"; } } sub mask { my @a = @_; my $end = delete_eq_end(\@a); my $begin = delete_eq_begin(\@a); return "$a[0]\n" if @a==1; return $begin . '{' . join(',', @a) . '}' . "$end\n"; } sub delete_eq_end { my $aref = shift; return '' if @$aref == 1; my $end = ''; while (1) { my @a = @$aref; $a0 = substr $a[0], -1, 1, ''; for (my $i=1; $i<@a; $i++) { my $ai = substr $a[$i], -1, 1, ''; return reverse($end) if $ai ne $a0; } $end .= $a0; @$aref = @a; } } sub delete_eq_begin { my $aref = shift; return '' if @$aref == 1; my $begin = ''; while (1) { my @a = @$aref; $a0 = substr $a[0], 0, 1, ''; for (my $i=1; $i<@a; $i++) { my $ai = substr $a[$i], 0, 1, ''; return $begin if $ai ne $a0; } $begin .= $a0; @$aref = @a; } } |
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||