#!/bin/csh -f # Preprocesses and put the segment tags in a TDC text. # Current version includes segment tags of Segmenter and TextTiling. # Segment tag is # file.seg will contain the TDC text with segment tags. # Usage: segment_marker TDC_file # Commands set BLANKLINE = '/a/kaml4/usr2/staff/ychali/project/tools/blankline' set TILE = '/a/kaml4/usr2/staff/ychali/TextTiling/tile' set R_SEG1 = '/a/kaml4/usr2/staff/ychali/project/tools/r_seg1' set ECHOS = '/usr/bin/echo .START' set ECHO = '/usr/bin/echo ' set LAYOUT = '/a/kaml4/usr2/staff/ychali/segmenter/segmenter/bin/util/layoutRecognizer -m wsj' set TERMER = '/a/kaml4/usr2/staff/ychali/segmenter/segmenter/bin/termer' set SEGMENTER = '/a/kaml4/usr2/staff/ychali/segmenter/segmenter/bin/segmenter' set R_SEG2 = '/a/kaml4/usr2/staff/ychali/project/tools/r_seg2' set CAT = cat set RM = rm # Variables set TMP1 = temp1 set TMP2 = temp2 set TMP3 = temp3 set TMP4 = temp4 set TMP5 = temp5 # call blankline $BLANKLINE $1 $TMP1 # call tile $TILE $TMP1 > $TMP2 # call r_seg1 $R_SEG1 $TMP1 $TMP3 < $TMP2 # call echo ($ECHOS; $ECHO; $ECHO) > $TMP2 # call cat $CAT $TMP2 $TMP1 > $TMP4 # call layout $LAYOUT $TMP4 > "$TMP4.sgml" # call termer and segmenter $TERMER $TMP4.sgml | $SEGMENTER > $TMP5 # call r_seg2 $R_SEG2 $TMP3 $1.seg < $TMP5 # call rm $RM $TMP1 $TMP2 $TMP3 $TMP4 $TMP4.sgml $TMP5