#!/bin/perl -w ####################################################################### # # # This tool goes to a file tagged with the tstmt.dtd (used for the # # quran http://metalab.unc.edu/xml/examples/religion/quran/quran.xml # # extracts the title number from bktlong and bktshort elements and # # creates both a num attribute for the title and a num element # # inside the title element. # # 1. The Opening becomes # # 1The Opening # # # ####################################################################### use strict; use XML::DOM; my $dom = new XML::DOM::Parser; # create parser my $doc; # the dom object if( $ARGV[0]) { $doc= $dom->parsefile( $ARGV[0]); } # create the dom object else { $doc= $dom->parse( \*STDIN); } foreach my $tag ('bktlong', 'bktshort') { my $nodes= $doc->getElementsByTagName ( $tag); my $n = $nodes->getLength; for (my $i = 0; $i < $n; $i++) { process_title( $doc, $nodes->item ($i)); } } print $doc->toString; exit; sub process_title # title handler { my ( $doc, $title)= @_; my $title_pcdata= $title->getFirstChild; # get first text element my $title_text= $title_pcdata->getData; # get it's data my $title_no; ($title_no, $title_text)= # separate num and the ( $title_text=~ /\A(\d+)\.?\s*(.*)\Z/); # rest of the title $title->setAttribute( num => $title_no); # create attribute numm my $num= $doc->createElement( 'num'); # create element num $title->insertBefore( $num, $title_pcdata); # insert it in title my $num_pcdata= $doc->createTextNode( $title_no); # create the text $num->appendChild( $num_pcdata); # insert it in num $title_pcdata->setData( $title_text); # set title new text }