User:Irkm/BuildArchive/Tools/UserHistoryScript

Documentation
This is a little script i use to extract user names for crediting from article histories.

The script should run under any modern perl environment.

Usage: ./getAuthors.pl 

Example: ./getAuthors.pl "Build:W/D_Zealous_Decapitater"

Caveats
The script is not extensivly tested. It just works for me.

Edit histories longer than 500 entries are not handled correctly. The oldest entries are ignored. Since i have not yet encountered a build article with more than 500 entries this should not be a problem. I can fix this if neccessary.

Script

 * 1) !/usr/bin/perl
 * 2) Get list of contributors of a GuildWiki page
 * 3) Written by Irkm_Desmet
 * 4) Changes by Hhhippo (14 Apr 2007):
 * 5) - Sort contributors in order of their appearance on the history page
 * 6) - Removed skipping of minor editors (found a page creator who called that minor)
 * 7) - Some output cosmetics
 * 1) - Sort contributors in order of their appearance on the history page
 * 2) - Removed skipping of minor editors (found a page creator who called that minor)
 * 3) - Some output cosmetics

use warnings; use strict;

if ($#ARGV != 0) { usage; exit(0); }

my $pageTitle = $ARGV[0]; my $url = "http://gw.gamewikis.org/wiki?title=$pageTitle&action=history&limit=500"; my $content = getURL($url); my ($originalAuthor, $users) = extractUserNames($content);

print "\n\n== Credits ==\n"; print "Original author: ". "" . $originalAuthor . " \n"; print "Additional contributors: \n";

my $contribs=""; foreach (sort { $$users{$a}{number} <=> $$users{$b}{number} } keys %$users) { next if $_ eq $originalAuthor; $contribs = $contribs. "" . $_ . ",\n" } $contribs =~ s/,\n$/\n\n\n/s; print $contribs;

sub extractUserNames { my $content = shift;

my @lines = split(/\n/, $content); #print "Lines= $#lines\n";

my $count = 0; my $userno = 0;

# Skip leading lines while ($lines[$count] !~ /^\(cur\)/) { $count++; }

my %users; my $originalAuthor;

while ($lines[$count] =~ /^/) { #  if ($lines[$count] =~ / m<\/span>/) { #  # minor edit -> skip #  $count++; #   next; #  }    $lines[$count] =~ /\(.*?)\<\/span\>/; my $part = $1; #print "\n$part\n"; if ($part =~ /(.*?)<\/a>/) { my ($pagename1, $pagename2,$userlink, $username) = ($1,$2,$3,$4);

if (!exists($users{$username})) { $users{$username}{number} = $userno++; if (defined($pagename1)) { #print "Pagename1 = $pagename1, Userlink=$userlink, Username=$username\n"; $users{$username}{pagename} = $pagename1; $users{$username}{userlink} = $userlink; } else { #print "Pagename2 = $pagename2, Userlink=$userlink, Username=$username\n"; $users{$username}{pagename} = $pagename2; $users{$username}{userlink} = $userlink; }       }        # remember author, last in list is original author $originalAuthor = $username; } else { print "unparsed line: $part\n"; }   $count++; }   print "Contributors: $userno\n"; return $originalAuthor, \%users; }

sub usage { print "Usage: getAuthors \n"; print "Example: getAuthors \"Build:W/D_Zealous_Decapitater\"\n"; }

sub getURL {

use LWP::UserAgent; use URI;

my($url,$ret) = @_;

if(!defined $ret) { $ret = 5; }

my($ua) = LWP::UserAgent->new(env_proxy => 1,                                 keep_alive => 1,                                  timeout => 60,                                  # agent => "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; T312461)",                                 ); my($furl) = URI->new($url)->canonical; print "Getting URL : $url\n"; my($response) = $ua->get($furl); my($count); while(!$response->is_success) { $count++; warn "Error getting $furl, retrying ($count)."; sleep 5; if($count == $ret) { warn "Can't get $furl, aborting after $ret retries."; return undef; }   }

return $response->content; } __END__