#!/usr/bin/perl $comments = <0.99) ? 0.99 : winProb winProb = (winProb<0.01) ? 0.01 : winProb Thus performance rating is computed as: myRating = oppRating + (850*winScore - 0.5) Converges on the ratings as follows: 1. ratings of all players start at 0 2. ratings of all players calculated using performance rating formula 3. stop if average rating is not different between current and calculated ratings 4. calculated ratings copied to current ratings 5. go to step 2 Calculate each players rating using the performance rating formula where oppRating is the weighted average of the opponents current rating winScore is the weighted average of the game scores where score = 1 if the player won, 0.5 for draw, 0 for if player lost weight for each game is such that games from the most recent month have a weight of 1 and games from 48 months ago have a weight of 0; linearly decreasing wieght = (48 - monthsAgo)/48 add 3 draw games each with a weight of 1 against a 2300 rated player add 4 draw games each with a weight of 1 against a player with rating of oppRating subtract 13 points from rating for each game fewer than 7 games if (gamesPlayed < 7) then myRating -= 13*(7 - gamesPlayed) Adjust all ratings after they have converged so that the average rating is 2300. When predicting outcomes set rating of players with no ratings yet to 2300 add 45 rating points to the player playing white EOM # read in the training data @d = readData("data/training_data.csv"); # use only the last 48 months to calculate the players ratings $lm = 100; $nm = 48; @g = selectData(\@d, $lm-$nm, $lm); %r = convRatings(\@g); # adjust the ratings so that the average is 2300 $ar = avgRating(\%r); %r = adjRating(\%r, 2300 - $ar); showRatings(\%r); # read in the test data @d = readData("data/test_data.csv"); # make predictions on the test data using the calculated ratings @p = predict(\@d, \%r); $res = showData(\@p); print $res . "\n"; open(FH, ">submit.csv"); print FH qq("Month #","White Player #","Black Player #","Score"\n); print FH $res; close FH; sub showData{ my($ga) = @_; my($g, %g, $res, $pr); foreach $g (@$ga){ %g = %$g; $pr = sprintf("%.2f", $g{r}); $res .= "$g{w},$g{fp},$g{sp},$pr\n"; } return $res; } sub predict{ my($ga, $rh) = @_; my(%r, @ga, $g, $r1, $r2, $rdiff); %r = %$rh; @ga = @$ga; foreach $g (@ga){ $r1 = $r{$g->{fp}}; $r2 = $r{$g->{sp}}; if ($r1 eq ''){ $r1 = 2300; } if ($r2 eq ''){ $r2 = 2300; } $rdiff = $r1 - $r2; $rdiff += firstMoveAdvantage($r1, $r1); $g->{r} = expScore($rdiff); } return @ga; } sub firstMoveAdvantage{ my($r1, $r2) = @_; return 45; } sub convRatings{ my($g) = @_; my($i, %r, $ar, $ar1, @g); @g = @$g; $adj = 1; $i = 0; while(1){ %r = calcRatings(\@g, \%r, $g[-1]->{w}+1); $ar1 = $ar; $ar = avgRating(\%r); print "$ar\n"; if ($ar == $ar1){ last; } $i += 1; if ($i >= 200){ last; } } return %r; } sub showRatings{ my($r) = @_; my(%r, $k, $v, $mxr, $mnr, $av, $ct); $mnr = 999999999999; %r = %$r; while(($k, $v) = each(%r)){ if ($v > $mxr){ $mxr = $v; } if ($v < $mnr){ $mnr = $v; } $av += $v; $ct += 1; print "$k $v\n"; } $av = $av/$ct; print "max = $mxr min = $mnr avg = $av num = $ct\n"; } sub selectData{ my($da, $l, $h) = @_; my (@d, $d, %d, @g); @d = @$da; foreach $d (@d){ %d = %$d; if (($d{w} <= $h) && ($d{w}>$l)){ push(@g, $d); } } return @g; } sub adjRating{ my($r, $adj) = @_; my(%r, $k, $v); %r = %$r; while(($k, $v) = each(%r)){ $r{$k} = $v + $adj; $r{$k} = int($r{$k} + 0.5); } return %r; } sub spotRating{ my($r, $s) = @_; my(%r, $i, $k, $v, @r); %r = %$r; $i = 0; while(($k, $v) = each(%r)){ $r[$i] = $v; } @r = sort(@r); @r = reverse(@r); if ($s > $#r){ $s = -1; } return $r[$s]; } sub avgRating{ my($r) = @_; my(%r, $k, $v, $ar, $c); %r = %$r; while(($k, $v) = each(%r)){ $ar += $v; $c += 1; } if ($c > 0){ $ar = $ar/$c; } return $ar; } # cw is the current month index sub calcRatings{ my($ga, $r, $cw) = @_; my(@g, %r, $g, %g, $w, %or, %sc, %ct, $k, $v, $sop, $pr); %r = %$r; @g = @$ga; foreach $g (@g){ %g = %$g; # determine the weight of this game $w = (48 - ($cw - $g{w} - 1))/48.0; if ($w > 1){ $w = 1; } if ($w <= 0){ next; } # add to the opponent rating and score of the first player $or{$g{fp}} += $r{$g{sp}} * $w; $sc{$g{fp}} += $g{r} * $w; $ct{$g{fp}} += 1 * $w; $gc{$g{fp}} += 1; # add to the opponent rating and score of the second player $or{$g{sp}} += $r{$g{fp}} * $w; $sc{$g{sp}} += (1.0 - $g{r}) * $w; $ct{$g{sp}} += 1 * $w; $gc{$g{sp}} += 1; } while(($k, $v) = each(%or)){ # add 4 draws against average opponent $sop = $or{$k}/$ct{$k}; $or{$k} += 4*$sop; $sc{$k} += 4*0.5; $ct{$k} += 4; # add 3 draws against average player $or{$k} += 3*2300; $sc{$k} += 3*0.5; $ct{$k} += 3; $pr = perfRating($or{$k}/$ct{$k}, $sc{$k}/$ct{$k}); # rounding the rating to nearest int helps converge faster $r{$k} = int($pr + 0.5); # reduce the rating of new players with less than 7 games if ($gc{$k} < 7){ $r{$k} -= (7-$gc{$k})*13; } } return %r; } sub readData{ my($f) = @_; my($l, $w, $fp, $sp, $r, @d); local(*FH); open(FH, "<$f"); foreach $l (){ my($p); chomp $l; if ($l !~ m/^\d/){ next; } ($w, $fp, $sp, $r) = split(/,/, $l); if ($w ne ''){ $p->{w} = $w; $p->{fp} = $fp; $p->{sp} = $sp; $p->{r} = $r; push(@d, $p); } } return @d; } sub perfRating{ my($avgOppRating, $percentageScore) = @_; my($ratingDiff); $ratingDiff = $avgOppRating + 850*($percentageScore - 0.5); return $ratingDiff; } sub expScore{ my($ratingDiff) = @_; my($expScore); $expScore = ($ratingDiff/850) + 0.5; if ($expScore > .99){ $expScore = 0.99; } if ($expScore < .01){ $expScore = 0.01; } return $expScore; }