User:Johantheghost/wp-refconvert.pl


 * 1) !/usr/bin/perl


 * 1) A perl script to convert Wikipedia -style references to use the
 * 2) feature.
 * 3) Usage:
 * 4)     wp-refconvert article.txt
 * 5) creates a new file called article-new.txt, containing the new version
 * 6) of the article.
 * 1) of the article.

use utf8; binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); binmode(STDERR, ":utf8"); use strict;

my $prog = "wp-refconvert";


 * 1) Global Data
 * 1) Global Data

my $numRefs = 0; my $numNotes = 0; my $fixedRefs = 0;
 * 1) Number of distinct references and notes found.

my @references;
 * 1) Table of references.


 * 1) Article Parsing
 * 1) Article Parsing

sub readArticle { my ( $file ) = @_;

open(my $in, "<:utf8", $file) || die("$prog: can't open $file\n");

local($_); while (<$in>) { # Check for references in the line; and check for notes. These # should be mutually exclusive.

my @refs = m/\{\{ref[^}]+\}\}/g; foreach my $r (@refs) { addRef($r); }

my @notes = m/ *(\{\{note[^}]+\}\})/gc; if (scalar(@notes) > 0) { my ( $text ) = m/\G *(.*)$/; addNote($notes[0], $text); } }

close($in); }

sub addRef { my ( $ref ) = @_;

my ( $n, $k, $l ); if (($n) = ( $ref =~ /^\{\{ref\|([^}|]+)\}\}$/)) { $k = ++$numRefs; $l = ""; # printf "Simple: %s, %d, %s\n", $n, $k, $l; } elsif (($n, $k) = ( $ref =~ /^\{\{ref_num\|([^}|]+)\|([^}|]+)\}\}$/)) { $l = ""; # printf "Num:   %s, %d, %s\n", $n, $k, $l; } elsif (($n, $k, $l) = ( $ref =~ /^\{\{ref_label\|([^}|]+)\|([^}|]+)\|([^}|]+)\}\}$/)) { # printf "Label: %s, %d, %s\n", $n, $k, $l; } else { die("$prog: unknown reference style \"$ref\"\n"); }

my $record = $references[$k - 1]; if (!defined($record)) { $record = { 'name' => $n, 'count' => 1, 'usecount' => 0 }; $references[$k - 1] = $record; } else { if ($record->{'name'} ne $n) { die(sprintf "$prog: note mismatch: ref %d=%s; note=%s\n", $k, $record->{'name'}, $n); }   ++$record->{'count'}; } }

sub addNote { my ( $note, $text ) = @_;

my ( $n, $k, $l ); if (($n) = ( $note =~ /^\{\{note\|([^}|]+)\}\}$/)) { $k = ++$numNotes; $l = ""; # printf "Simple: %s, %d, %s\n", $n, $k, $l; } elsif (($n, $k, $l) = ( $note =~ /^\{\{note_label\|([^}|]+)\|([^}|]+)\|([^}|]+)\}\}$/)) { if ($k != ++$numNotes) { die("$prog: note number mismatch: seq = $numNotes, explicit = $k\n"); }   # printf "Label:  %s, %d, %s\n", $n, $k, $l; } else { die("$prog: unknown note style \"$note\"\n"); }

my $record = $references[$k - 1]; if (!defined($record)) { die("$prog: note mismatch: no ref $k ($n)\n"); } if ($record->{'name'} ne $n) { die(sprintf "$prog: note mismatch: ref %d=%s; note=%s\n", $k, $record->{'name'}, $n); }

$record->{'text'} = $text; }


 * 1) Article Editing
 * 1) Article Editing

sub editArticle { my ( $file, $output ) = @_;

my $doneRefs = 0;

open(my $in, "<:utf8", $file) || die("$prog: can't open $file\n"); open(my $out, ">:utf8", $output) || die("$prog: can't create $output\n");

local($_); while (<$in>) { # Check for references in the line; and check for notes. These # should be mutually exclusive.

if (/^\# \{\{note/) { if (!$doneRefs) { printf $out " \n"; ++$doneRefs; }   } else { s/(\{\{ref[^}]+\}\})/fixRef($1)/ge; print $out $_; } }

close($in); close($out); }

sub fixRef { my ( $ref ) = @_;

my ( $n, $k, $l ); if (($n) = ( $ref =~ /^\{\{ref\|([^}|]+)\}\}$/)) { $k = ++$fixedRefs; $l = ""; } elsif (($n, $k) = ( $ref =~ /^\{\{ref_num\|([^}|]+)\|([^}|]+)\}\}$/)) { $l = ""; # printf "Num:   %s, %d, %s\n", $n, $k, $l; } elsif (($n, $k, $l) = ( $ref =~ /^\{\{ref_label\|([^}|]+)\|([^}|]+)\|([^}|]+)\}\}$/)) { ; } else { die("$prog: unknown reference style \"$ref\"\n"); }

my $record = $references[$k - 1]; if (!defined($record)) { die("$prog: ref mismatch: no ref $k ($n)\n"); }

if ($record->{'count'} > 1) { if ($record->{'usecount'}++ == 0) { return sprintf " ", $record->{'name'}, $record->{'text'}; } else { return sprintf " ", $record->{'name'}; } } else { return sprintf " ", $record->{'text'}; } }


 * 1) Diagnostics
 * 1) Diagnostics

sub dumpRefs { if ($numNotes != $numRefs) { printf STDERR "## %d refs; %d notes\n", $numRefs, $numNotes; }

foreach my $i (1 .. $numRefs) { my $record = $references[$i - 1]; printf STDERR "[%2d] %-12s (%2d) %s\n", $i, $record->{'name'}, $record->{'count'}, $record->{'text'}; } }


 * 1) Main
 * 1) Main

sub main { my ( @args ) = @_;

my $article = $args[0];

# First, parse the article. readArticle($article);

# dumpRefs;

my $newvers = $article; ($newvers =~ s:\.([^/]+)$:-new.\1:) || ($newvers .= ".new"); printf STDERR "## edit %s -> %s\n", $article, $newvers; editArticle($article, $newvers);

0; }

exit(main(@ARGV));