#!/usr/bin/perl # -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*- use warnings; use strict; use utf8; BEGIN { $| = 1; binmode(STDIN, ':encoding(UTF-8)'); binmode(STDOUT, ':encoding(UTF-8)'); } use open qw( :encoding(UTF-8) :std ); use Getopt::Long; Getopt::Long::Configure('bundling'); Getopt::Long::Configure('no_ignore_case'); my %opts = (); GetOptions(\%opts, ('weight|w:s', 'mapping|m:s', 'reverse|r', 'first|1', 'help|?')); sub print_help { print <<'XOUT'; Usage: cg-sort [OPTIONS] Pipe a CG stream through this to sort and unique the readings of each cohort. Options: -?, --help outputs this help -w, --weight sorts by a numeric tag; defaults to W -m, --mapping sorts mapping tags with given prefix; defaults to @ -r, --reverse reverses the sort order -1, --first only keep the first reading XOUT } if (defined $opts{'help'}) { print_help(); exit(0); } my $W = 'W'; if (exists($opts{weight}) && length($opts{weight})) { $W = $opts{weight}; } my $M = '@'; if (exists($opts{mapping}) && length($opts{mapping})) { $M = $opts{mapping}; } my $in_cohort = 0; my %readings = (); my %deleted = (); my $trail = ''; sub sort_weight { my $wa = 0.0; my $wb = 0.0; if ($a =~ m/(?:^|\s)<\Q$W\E:([\d.]+)>(?:\s|$)/ || $a =~ m/(?:^|\s)\Q$W\E:([\d.]+)(?:\s|$)/) { $wa = $1; } if ($b =~ m/(?:^|\s)<\Q$W\E:([\d.]+)>(?:\s|$)/ || $b =~ m/(?:^|\s)\Q$W\E:([\d.]+)(?:\s|$)/) { $wb = $1; } if ($wa == $wb) { return $a cmp $b; } return $wa <=> $wb; } sub print_sorted_readings { if (!@_) { return; } if (exists($opts{mapping})) { foreach (@_) { my @tags = ($_ =~ m@ ($M\S+)@g); @tags = sort @tags; my $t = join(' ', @tags); $_ =~ s@( $M\S+)+@ $t@; } } if (exists($opts{weight})) { @_ = sort sort_weight @_; } else { @_ = sort @_; } if (exists($opts{reverse})) { @_ = reverse @_; } if (exists($opts{first})) { print $_[0]; } else { print join('', @_); } } sub print_sorted { if (!$in_cohort) { return; } print_sorted_readings(keys(%readings)); print_sorted_readings(keys(%deleted)); print $trail; %readings = (); %deleted = (); $trail = ''; $in_cohort = 0; } my $fh = *STDIN; if (defined $ARGV[0]) { open($fh, '<', $ARGV[0]) or die $!; } while (<$fh>) { if (m/^"<.+?>"/) { print_sorted(); $in_cohort = 1; } elsif ($in_cohort) { if (m/^\s+".+?"/) { $readings{$_} = 1; } elsif (m/^;\s+".+?"/) { $deleted{$_} = 1; } else { $trail .= $_; } next; } print; } print_sorted();