t2/az5112

Download the raw code.

#!/usr/bin/env perl6

#This is perl6 version 2011.12-18-ga7fd89e built on parrot 3.11.0 revision RELEASE_3_11_0

sub MAIN( Int $N ) {
    my $aa = 1; my $aa3 = 1;
    my $bb = 1; my $bb3 = 1;

    my %sums;
    my %mltpl_sums;
    my $max_sum_found = 0;

    while %mltpl_sums.elems < $N or $aa3 < $max_sum_found { ##(1)
        while $bb3 < $aa3 {
            if %sums{$aa3 + $bb3} {
                if not %mltpl_sums{ $aa3 + $bb3 } {
                    %mltpl_sums{ $aa3 + $bb3 } = %sums{ $aa3 + $bb3 };
                };
                %mltpl_sums{ $aa3 + $bb3 } ~= ",$aa,$bb";
                $max_sum_found max= ( $aa3 + $bb3 );
            }
            else {
                # Using arrays instead of a CSV string would add
                # a considerable speed penalty (~50% for 100 numbers).
                %sums{ $aa3 + $bb3 } = "$aa,$bb";
            }
            $bb += 1;
            $bb3 = $bb * $bb * $bb;
        }
        $aa += 1;
        $aa3 = $aa * $aa * $aa;
        $bb = $bb3 = 1;

        # %sums .= grep: *.key < $aa3; # remove useless values ##(3)
    }

    for %mltpl_sums.keys.sort.list.splice( 0, $N ) -> $total { ##(2)
        print "$total";
        for %mltpl_sums{$total}.split( "," ) -> $x, $y {
            print " = $x ** 3 + $y ** 3"    
        }
        say;
    };
}

=begin pod

=head1 Apocryphal

Here is a revelation - hashes suck!

We take it for granted than an array in perl (or any other scripting language)
is more powerful than a C array or a C++ vector.  It combines the functionality
of arrays, lists, queues, dequeues, etc. You can push, and pop, shift, unshift,
splice, etc. It is not efficient as C++ data structures but it is more convenient
-- a fair trade off, one that you would expect from a scripting language.

The same should hold for associative containers. But it doesn't. C++ offers an
unordered_map and a map (plus their multi equivalents). An unordered_map can be
thought of as the closest equivalent of a perl hash.  It is harder to use than
a map and it did not even exist in the standard until recently (i.e. until C++0x
came out). A map, unlike an unordered_map, is ordered.

So, why does perl6 use bucket-based hashes instead of an RB-tree the way C++ map
does? Because hashes are unordered I had to store the value of the largest key
in a separate variable ##(1), sort the keys, before printing out the result ##(2)
and there is no way to cheaply remove smallest hash keys ##(3) that are no longer
needed, or remove extraneous entries from mltpl_sums ##(4).

So, on one hand Perl hashes have to be fast since they also happen to masquerade
as structs (and RB tree is slower than hash buckets), on the other, the lack of
a decent associative container hinders algorithm development, as this little
example shows. I'd like to be able to pop or unshift from a hash, check what's
at its front or back, splice it, or do .keys( from => 20, to => 40 ).

=head2 Ye Olde Runes

 #include <algorithm>
 #include <cstdlib>
 #include <iostream>
 #include <map>
 #include <sstream>
 #include <vector>
 using namespace std;

 typedef unsigned long long ulolo;
 typedef map< ulolo, vector< pair< ulolo, ulolo > > > solution_t;

 void say( pair< ulolo const, vector< pair< ulolo, ulolo > > > s ) {
    cout << s.first;
    for( int i = 0; i < s.second.size(); ++i ) {
        cout << " = " << s.second[i].first << " ** 3 + ";
        cout << s.second[i].second << " ** 3 ";
    }
    cout << endl;   
 }

 int main( int argc, char* argv[] )
 {
    if( argc != 2 ) { exit( 1 ); };

    stringstream s( argv[1] );
    int N; s >> N;

    ulolo aa = 1; ulolo aa3 = 1;
    ulolo bb = 1; ulolo bb3 = 1;

    map< ulolo, pair< ulolo, ulolo > > sums;
    solution_t dbl_sums;

    while( dbl_sums.size() < N or aa3 < dbl_sums.end()->first ) {
        while( bb3 < aa3 ) {
            if( sums.find( aa3 + bb3 ) == sums.end() ) {
                sums[ aa3 + bb3 ] = make_pair( aa, bb );
            }
            else {
                dbl_sums[ aa3 + bb3 ].push_back( make_pair( aa, bb ) );
                if( dbl_sums[ aa3 + bb3 ].size() == 1 ) {
                    dbl_sums[aa3 + bb3 ].push_back( sums[aa3 + bb3] );
                }
                if( dbl_sums.size() > N ) {
                    dbl_sums.erase( --dbl_sums.end() ); //##(4)
                }
            }
            bb += 1; bb3 = bb * bb * bb;
        }
        aa += 1; aa3 = aa * aa * aa;
        sums.erase( sums.begin(), sums.lower_bound( aa3 ));     
        bb = bb3 = 1;
    }

    for_each( dbl_sums.begin(), dbl_sums.end(), say );
 }

=end pod

Correctness

This code finds all sums of cubes, but prints them in the wrong order: sorted lexicographically instead of numerically. (Why? Because hash keys by default are strings, and so .keys.sort will have string-sorting semantics.)

It seems to handle the case of more than two ways to write a sum of cubes, even though excessive resource usage prevented us from testing it.

Consistency

The layout is consistent.

Clarity of intent

The code is easy to follow, though the inner while-loop could have benefitted from attaching a name to the expression $aa3 + $bb3, which is used seven times in that loop body.

Could the choice of $aa and $bb instead of $a and $b be a remnant from Perl 5-think, where you're never ever supposed to name your variables $a and $b? Well, there's no risk of that in Perl 6.

Algorithmic efficiency

The code performs quite well for the first 200 sums of cubes, and then becomes slow and memory-hungry.

Idiomatic use of Perl 6

Nice usage of max=. Superstitious parentheses on the right-hand side, though.

Some parts of the code have been done more idiomatic though. For example the clunky

if not %mltpl_sums{ $aa3 + $bb3 } {
    %mltpl_sums{ $aa3 + $bb3 } = %sums{ $aa3 + $bb3 };
};

Could easily have been written as

%mltpl_sums{ $aa3 + $bb3 } //= %sums{ $aa3 + $bb3 };

Whereas ||= captures the meaning exactly, //= has equivalent effect and is less surprising.

Brevity

The code is neither chatty nor overly compressed.

Apocryphal

We take the comment block as a friendly invitation to discuss the containers available in Perl 6.

az5112 rightly deplores the lack of several container data structures in Perl 6. It is something we must fix over time. We believe that both Rakudo and Niecza are now mature enough to support more containers easily as user-supplied modules. The flexibility of Perl 6 makes it easy to add container types with intuitive interfaces.

However, it doesn't follow from the lack of some container data structures that the Hash datatype needs fixing. A core data structure should be general as possible while still being fast. The vast majority of use cases justify the current factoring of having Hash being an unordered map. Making it more general for the benefit of a tiny fraction of use cases feels like the wrong tradeoff.

The use cases for a hash with preserved insertion order are rather limited. The problem at hand rather calls for a data structure that allows for arbitrary access but can also be used as a priority queue.

To demonstrate that it's not so hard to come up with custom data structures, we present a t2 solution which uses a combined Hash and Binary Heap for queuing duplicate sums of cubes. A balanced tree might be a nicer choice overall, but this works fine.

class HashHeap {
    has %!hash handles <at_key>;
    has @!heap = [Any];
    has &.by = -> $x { $x };

    method Bool() { @!heap > 1 };
    method root() { @!heap[1] };

    method insert($elem) {
        @!heap.push($elem);
        %!hash{$elem} = $elem;
        self!reheap_up(@!heap.end);
    }

    method !reheap_up(Int $i is copy) {;
        while $i > 1 && &!by(@!heap[$i div 2]) after &!by(@!heap[$i]) {
            @!heap[$i div 2, $i] = @!heap[$i, $i div 2];
            $i div= 2;
        }
    }

    # remove root
    method shift() {
        fail "Cannot shift from empty heap" if @!heap < 2;
        my $old = @!heap[1];
        @!heap[1] = @!heap.pop;
        self!reheap_down(1);
        %!hash.delete($old);
        $old;
    }

    method !reheap_down(Int $idx) {
        return if 2 * $idx > @!heap.end;
        my Int $i = 2 * $idx;
        $i++ if 2 * $idx + 1 <= @!heap.end
            && &!by(@!heap[2 * $idx + 1]) before &!by(@!heap[2 * $idx]);
        if &!by(@!heap[$idx]) after &!by(@!heap[$i]) {
            @!heap[$idx, $i] = @!heap[$i, $idx];
            self!reheap_down($i);
        }
    }
}

class Cube {
    has $.value handles <Str Numeric Real Int>;
    has @.constructions;
    multi method gist(Cube:D:) {
        join ' = ', $.value, @.constructions;
    }
}

sub MAIN($target-count = 500) {
    my %seen;
    my $solutions = HashHeap.new(by => *.value);
    my $count = 0;
    loop (my int $x = 2; True; $x = $x + 1) {
        my int $x3 = $x ** 3;
        loop (my int $y = $1; $y <= $x; $y = $y + 1) {
            my int $n = $x3  + $y ** 3;
            if %seen{$n} {
                if $solutions{$n} {
                    $solutions{$n}.constructions.push: "$x ** 3 + $y ** 3";
                } else {
                    my $c = Cube.new(
                        value         => $n,
                        constructions => ["$x ** 3 + $y ** 3",  %seen{$n}],
                    );
                    $solutions.insert($c);
                }
            }
            %seen{$n} = "$x ** 3 + $y ** 3";
        }
        while $solutions && $solutions.root <= $x3 {
            say $solutions.shift;
            return if ++$count == $target-count;
        }
    }
}

This calculates the first 500 sums of cubes in less than a minute on a current (2012.01-54-gacbec86) version of Rakudo.