#!/bin/perl -w
#######################################################################
# #
# This tool goes to a file tagged with the tstmt.dtd (used for the #
# quran http://metalab.unc.edu/xml/examples/religion/quran/quran.xml #
# extracts the title number from bktlong and bktshort elements and #
# creates both a num attribute for the title and a num element #
# inside the title element. #
# 1. The Opening becomes #
# 1The Opening #
# #
#######################################################################
use strict;
use XML::DOM;
my $dom = new XML::DOM::Parser; # create parser
my $doc; # the dom object
if( $ARGV[0]) { $doc= $dom->parsefile( $ARGV[0]); } # create the dom object
else { $doc= $dom->parse( \*STDIN); }
foreach my $tag ('bktlong', 'bktshort')
{ my $nodes= $doc->getElementsByTagName ( $tag);
my $n = $nodes->getLength;
for (my $i = 0; $i < $n; $i++)
{ process_title( $doc, $nodes->item ($i)); }
}
print $doc->toString;
exit;
sub process_title # title handler
{ my ( $doc, $title)= @_;
my $title_pcdata= $title->getFirstChild; # get first text element
my $title_text= $title_pcdata->getData; # get it's data
my $title_no;
($title_no, $title_text)= # separate num and the
( $title_text=~ /\A(\d+)\.?\s*(.*)\Z/); # rest of the title
$title->setAttribute( num => $title_no); # create attribute numm
my $num= $doc->createElement( 'num'); # create element num
$title->insertBefore( $num, $title_pcdata); # insert it in title
my $num_pcdata= $doc->createTextNode( $title_no); # create the text
$num->appendChild( $num_pcdata); # insert it in num
$title_pcdata->setData( $title_text); # set title new text
}