#!/usr/bin/perl -ws our ($h,$help,$cell,$cell_corners,$move_to_cell_center,$exclude_initial, $all_inside_cell); if ($h || $help) { (my $program = $0) =~ s/^.*[\/\\]//; print "Usage: $program [options] *.cif Dependencies: perl Convert cif to MDL xyz format (filename.cif to filename.xyz) The program prints filename to sdtout and warns (to stderr) if fractional coordinates block of cif-file contains not exactly one molecule. Options: -cell=MxNxK fill unit cell by symmetry operations and then multiply the cell M,N,K times by a,b,c directions (initial unit cell places in the centre of supercell) -cell=N equal -cell=NxNxN -cell equal -cell=1x1x1 (only pack unit cell) -move_to_cell_center move molecules included in unit cell to the cell center -exclude_initial f.e. for -cell=3x3x3 make 26 unit cells (without central) -cell_corners add dummies XX in the unit cell corners -all_inside_cell place all atoms given in cif-file inside the unit cell (as obabel does, not a whole molecule is obtained). This option is too little tested. The program takes fractional coordinates 'as is' without trying to grow molecules or eliminate disorder. So, it doesn't operate correctly with the symmetric molecules (not a whole molecules are obtained). A sign of disorder or not-a-whole-molecule is warning 'Formulas are not coincided' (however, occasionally this warning is caused by an error in _chemical_formula_sum). "; exit; } #use Data::Dump 'pp'; my $num = qr/\d+(?:\.\d+)?/; FILE: foreach my $file (@ARGV) { (my $name = $file) =~ s/\.cif//i; print "$name\n"; my ($formula,$alpha,$beta,$gamma,$Z,$a,$b,$c,@frac); my @loops; open L, '<', $file or die "Can't open $file: $!\n"; LLL: while () { s/^\s+//; m/^_chemical_formula_sum\s+(.+)/ and $formula = $1; m/^_cell_angle_alpha\s+(\S+)/ and $alpha = $1; m/^_cell_angle_beta\s+(\S+)/ and $beta = $1; m/^_cell_angle_gamma\s+(\S+)/ and $gamma = $1; m/^_cell_formula_units_Z\s+(\S+)/ and $Z = $1; m/^_cell_length_a\s+(\S+)/ and $a = $1; m/^_cell_length_b\s+(\S+)/ and $b = $1; m/^_cell_length_c\s+(\S+)/ and $c = $1; #print "$c\n"; if (m/^loop_/) { my @loop; while () { s/^\s+//; if (/^_/) { chomp; push @{$loop[0]}, $_; } else { chomp; push @loop, $_; last; } } while () { s/^\s+//; if (/^_/ || /^loop_/) { push @loops, \@loop; #pp @loop; redo LLL; } elsif (eof || /^\s*$/) { chomp; push @loop, $_; push @loops, \@loop; #pp @loop; redo LLL; } else { chomp; push @loop, $_; } } } } close L; foreach ($a,$b,$c,$alpha,$beta,$gamma) { s/\(.+//; unless ($_) { warn "$file: Not cif?\n"; next FILE; } } #warn "$a,$b,$c,$alpha,$beta,$gamma\n"; my %formula; if ($formula) { $formula =~ s/^'(.+)'$/$1/; $formula =~ s/([A-Za-z])(?=\s|$)/${1}1/g; %formula = map {/([A-Za-z]+)($num)/ && (ucfirst lc $1,$2)} split ' ', $formula; #pp %formula; exit; } else { warn "$file: No _chemical_formula_sum\n"; } #pp @loops; my @symmop; foreach my $loop (@loops) { #pp $loop; if (grep {/_symmetry_equiv_pos/ || /_space_group_symop/} @{$loop->[0]}) { my $head = shift @$loop; foreach (@$loop) { s/^\s*\d+\s+//; s/^'(.+)'$/$1/; s/\s+//g; tr/XYZ/xyz/; #s/(?<=^|,)\+//g; s/(^|,)\K\+//g; } #pp $loop; exit; @symmop = @$loop; } if (grep {/_atom_site_fract/} @{$loop->[0]}) { #pp $loop; my $head = shift @$loop; my $N = @$head; my @body = map {split} @$loop; my @table; for (my $i=0; $i<@body; $i++) { $table[int($i/$N)][$i%$N] = $body[$i]; } if (@{$table[-1]} != $N) { warn "$file: Somewhat bad in loop_ where fractional coordinates. The number of values in the body must be a multiple of the number of tags in the header.\n"; next FILE; } #pp $head; exit; my ($nlabel,$nsymbol,$nx,$ny,$nz); for (my $i=0; $i<@$head; $i++) { $head->[$i] eq '_atom_site_label' and $nlabel = $i; $head->[$i] eq '_atom_site_type_symbol' and $nsymbol = $i; $head->[$i] eq '_atom_site_fract_x' and $nx = $i; $head->[$i] eq '_atom_site_fract_y' and $ny = $i; $head->[$i] eq '_atom_site_fract_z' and $nz = $i; } if ( !defined($nlabel) && !defined($nsymbol) ) { warn "$file: Label tag in headers of loop_ where fractional coordinates?\n"; next FILE; } for (my $i=0; $i<@table; $i++) { my $atom; if (defined $nsymbol) { $atom = $table[$i][$nsymbol]; } else { ($atom = $table[$i][$nlabel]) =~ s/[^A-Za-z].*//; } my ($fx,$fy,$fz) = @{$table[$i]}[$nx,$ny,$nz]; unless ($atom || $fx || $fy || $fz) { warn "$file: Somewhat bad in headers of loop_ where fractional coordinates\n"; next FILE; } foreach ($fx,$fy,$fz) { s/\(.*//; } push @frac, [ucfirst lc $atom,$fx,$fy,$fz]; } } # else { # warn "$file: No fractional coordinates\n"; # next FILE; # } } #pp @frac; # Compare atoms in fractional coordinates with _cheical_formula_sum my %formula_frac; foreach my $val (@frac) { $formula_frac{$val->[0]}++; } if (!%formula_frac) { warn "$file: No fractional coordinates\n"; next FILE; } if (%formula) { my ($f1,$f2); foreach my $k (sort by_Hill keys %formula) { $f1 .= "$k$formula{$k} "; } foreach my $k (sort by_Hill keys %formula_frac) { $f2 .= "$k$formula_frac{$k} "; } if ($f1 ne $f2) { my $mult; foreach my $m (2..6) { (my $f = $f1) =~ s/($num)/$1*$m/ge; if ($f eq $f2) { $mult = $m; last; } } if ($mult) { warn "$file: Fractional coordinates contains $mult molecules \n"; } else { warn "$file: Formulas are not coincided (${f1}in _chemical_formula_sum and ${f2}in frac. coord.)\n"; } } } else { warn "$file: No _chemical_formula_sum\n"; } if ($all_inside_cell) { foreach my $v (@frac) { foreach (@{$v}[1..3]) { $_ += 1 if $_<0; $_ -= 1 if $_>=1; } } } if ($cell) { if ($cell =~ /^(\d+)$/) { $cell = "${1}x${1}x${1}"; } # Pack unit cell my @cell_frac; # my @cell_frac = @frac; foreach my $op (@symmop) { #print "$op\n"; #next if $op eq 'x,y,z'; my @frac1 = symop($op, @frac); transpose_into_cell(@frac1); push @cell_frac, @frac1; } move_to_cell_center(@cell_frac) if ($move_to_cell_center && !$all_inside_cell); #print_frac(@cell_frac); @frac = @cell_frac; # Myltiply unit cell my ($a,$b,$c) = split 'x', $cell; my $n_cell_frac = @cell_frac; my @mult_frac = @frac; foreach my $m (mult_ar($a)) { foreach my $v (@frac) { push @mult_frac, [ $v->[0], $v->[1]+$m, $v->[2], $v->[3] ]; } } @frac = @mult_frac; foreach my $m (mult_ar($b)) { foreach my $v (@frac) { push @mult_frac, [ $v->[0], $v->[1], $v->[2]+$m, $v->[3] ]; } } @frac = @mult_frac; foreach my $m (mult_ar($c)) { foreach my $v (@frac) { push @mult_frac, [ $v->[0], $v->[1], $v->[2], $v->[3]+$m ]; } } splice @mult_frac,0,$n_cell_frac if $exclude_initial; @frac = @mult_frac; } if ($cell_corners) { push @frac, ['XX',0,0,0], ['XX',1,0,0], ['XX',0,1,0], ['XX',0,0,1], ['XX',1,1,0], ['XX',1,0,1], ['XX',0,1,1], ['XX',1,1,1], ['XX',0.01,0,0], ['XX',0,0.01,0], ['XX',0,0,0.01], ['XX',-0.01,0,0], ['XX',0,-0.01,0], ['XX',0,0,-0.01]; } my $xyz = frac2xyz(\@frac,$a,$b,$c,$alpha,$beta,$gamma); write_molden($xyz,"$name.xyz"); } sub frac2xyz { my ($frac,$a,$b,$c,$alpha,$beta,$gamma)=@_; my ($sa,$sb,$sg) = map {sin($_/57.29578)} ($alpha,$beta,$gamma); my ($ca,$cb,$cg) = map {cos($_/57.29578)} ($alpha,$beta,$gamma); #print "$sa,$sb,$sg, $ca,$cb,$cg\n"; my @xyz = ({}); foreach (@$frac) { my ($at,$x,$y,$z) = @$_; my $X = $a*$x + $b*$cg*$y + $c*$cb*$z; my $Y = $b*$sg*$y + $c*($ca-$cb*$cg)/$sg*$z; my $Z = $c*sqrt(1-$ca**2-$cb**2-$cg**2+2*$ca*$cb*$cg)/$sg*$z; push @xyz, [$at,$X,$Y,$Z]; } return \@xyz; } sub write_molden { my $oldfh; if (@_ > 1 && !ref($_[-1])) { my $file = pop @_; open F, '>', $file or do {warn "Can't write to $file: $!\n"; return}; $oldfh = select F; } foreach my $mol (@_) { my $N = $#{$mol}; print " $N\n"; print " Energy $mol->[0]{Energy}" if $mol->[0]{Energy}; print " Symmetry $mol->[0]{Symmetry}" if $mol->[0]{Symmetry}; print "\n"; for (my $i=1; $i<=$N; $i++) { printf " %-2s %12.8f %12.8f %12.8f\n", @{$mol->[$i]}; } } if ($oldfh) { close F; select $oldfh; } } sub move_to_cell_center { my ($Cx,$Cy,$Cz); foreach my $v (@_) { $Cx += $v->[1]; $Cy += $v->[2]; $Cz += $v->[3]; } ($Cx,$Cy,$Cz) = map {$_/@_} ($Cx,$Cy,$Cz); foreach my $v (@_) { #printf "%2s%8.3f%8.3f%8.3f\n", @$v; $v->[1] -= ($Cx-0.5); $v->[2] -= ($Cy-0.5); $v->[3] -= ($Cz-0.5); } return 1; } sub print_frac { foreach my $v (@_) { printf "%2s%8.3f%8.3f%8.3f\n", @$v; } print "\n"; return 1; } sub symop { my $op = shift; my @frac = @_; $op =~ s/([xyz])/\$$1/g; my @ops = split ',', $op; #print "@ops\n"; my @frac1; foreach my $val (@frac) { my ($at,$x,$y,$z) = @$val; my ($x1,$y1,$z1); $x1 = eval "$ops[0]"; warn $@ if $@; $y1 = eval "$ops[1]"; warn $@ if $@; $z1 = eval "$ops[2]"; warn $@ if $@; #printf "%2s%8.3f%8.3f%8.3f\n", $at,$x1,$y1,$z1; push @frac1, [$at,$x1,$y1,$z1]; } return @frac1; } sub center { my ($x,$y,$z); foreach my $v (@_) { $x += $v->[1]; $y += $v->[2]; $z += $v->[3]; } return map {$_/@_} ($x,$y,$z); } sub transpose_into_cell { my @frac = @_; my ($cx,$cy,$cz) = center(@frac); #printf "%8.3f%8.3f%8.3f ", $cx,$cy,$cz; my ($sx,$sy,$sz) = (0,0,0); $cx<0 and $sx = 1; $cy<0 and $sy = 1; $cz<0 and $sz = 1; $cx>1 and $sx = -1; $cy>1 and $sy = -1; $cz>1 and $sz = -1; #print "$sx,$sy,$sz\n"; if ($sx != 0 || $sy != 0 || $sz != 0) { #print_frac(@frac); foreach (@frac) { @$_ = ($_->[0], $_->[1]+$sx,$_->[2]+$sy,$_->[3]+$sz); } #print_frac(@frac); } #printf "%8.3f%8.3f%8.3f\n", center(@frac); } sub mult_ar { my $n = shift; my @nnn = (); foreach (1..$n-1) { push @nnn, $_%2 ? $_/2+0.5 : -$_/2; } return @nnn; } sub by_Hill { return -1 if uc($a) eq 'C' and uc($b) ne 'C'; return 1 if uc($a) ne 'C' and uc($b) eq 'C'; return -1 if uc($a) eq 'H' and uc($b) ne 'H'; return 1 if uc($a) ne 'H' and uc($b) eq 'H'; uc($a) cmp uc($b); }