#!/usr/bin/perl # reads a file who's name is given in the command line # prints the name of each document () and its text () on one line use strict; my ($line, $in_text, $name, $text, $all_text); $in_text=0; while($line =<>) { chomp $line; if ($line =~ /\ (.+) \<\/DOCNO\>/) {$name=$1;} elsif ($line =~ /\<\/DOC\>/) { $all_text =~ s/ +$//go; $all_text =~ s/^ +//go; $all_text =~ s/ +/ /go; print $name, "***", $all_text, "\n"; $all_text=""; } elsif ($line =~ /\/) {$in_text=1; $text ="";} elsif ($line =~ /\<\/TEXT\>/) {$in_text=0; $all_text .= $text." "; } elsif ($in_text) { $text .= $line." "; } #print $line; }