package Data::Mining::Apriori;

use 5.010001;
use strict;
use warnings;
no if $] >= 5.017011, warnings => 'experimental::smartmatch';

our $VERSION = 0.06;

my $self;

$" = ', ';
$| = 1;
$SIG{'INT'} = \&stop;

sub new{
	my $type = shift;
	my $class = ref($type)||$type;
	$self = {
		totalTransactions => undef,
		minSupport => undef,
		minConfidence => undef,
		output => undef,
		messages => undef,
		itemsKeyDescription => undef,
		itemsKeyTransactions => undef,
		quantityPossibleRules => undef,
		frequentItemset => undef,
		recursively => 0,
		associationRules => undef,
		itemsetSize => undef,
		rule => 0
	};
	@{$self->{frequentItemset}}=();
	bless($self,$class);
	return $self;
}

sub validate_data{
	($self->{totalTransactions}=~/^\d+$/)
		or die("Error: \$self->{totalTransactions} invalid value!\n");
	($self->{minSupport}=~/^\d{1,3}\.?\d*$/
	&&($self->{minSupport}>=0&&$self->{minSupport}<=100))
		or die("Error: \$self->{minSupport} invalid value!\n");
	($self->{minConfidence}=~/^\d{1,3}\.?\d*$/
	&&($self->{minConfidence}>=0&&$self->{minConfidence}<=100))
		or die("Error: \$self->{minConfidence} invalid value!\n");
	($self->{itemsKeyDescription})
		or die("Error: \$self->{itemsKeyDescription} is not defined!\n");
	($self->{itemsKeyTransactions})
		or die("Error: \$self->{itemsKeyTransactions} is not defined!\n");
}

sub generate_rules{
	$self->validate_data;
	my @itemsetCandidateSizeOne = keys(%{$self->{itemsKeyDescription}});
	if($self->{messages}){
		my $quantityItems = scalar(@itemsetCandidateSizeOne);
		$self->{quantityPossibleRules} = ((3**$quantityItems)-(2**($quantityItems+1))+1);
		print "\n$quantityItems items, $self->{quantityPossibleRules} possible rules";
	}
	$self->{recursively} = 1;
	$self->association_rules_itemset_size(\@itemsetCandidateSizeOne, 2);
}

sub association_rules_itemset_size{
	my @itemsetCandidateSizeOne;
	my $itemsetSizeLimit;
	if($self->{recursively}){
		@itemsetCandidateSizeOne = @{$_[1]};
		$itemsetSizeLimit = $_[2];
		$self->{itemsetSize} = $_[2];
	}
	else{
		$self->validate_data;
		(scalar(@_)==2)
			or die("Error: Invalid number of arguments! It is expected a number representing the size of itemset!\n");
		@itemsetCandidateSizeOne = keys(%{$self->{itemsKeyDescription}});
		((scalar(@itemsetCandidateSizeOne)>=$_[1])&&($_[1]>=2))
			or die("Error: quantity of items should be greater or equal the size of itemset and size of the itemset greater or equal to 2!\n");
		$itemsetSizeLimit = $_[1];
		$self->{itemsetSize} = $_[1];
	}
	my @newItemsetCandidateSizeOne = ();
	if($self->{messages}){
		print "\n\nItemset size ", $self->{itemsetSize}, ", ", scalar(@itemsetCandidateSizeOne), " items ";
		print "\nProcessing... ";
	}
	for(my$itemsetSize=2;$itemsetSize<=$itemsetSizeLimit;$itemsetSize++){
		my $code=<<'CODE';
my @itemsetCandidate=();
my %itemsetSubset;
for(my$k=0;$k<=$#itemsetCandidateSizeOne;$k++){
	push @itemsetCandidate,$itemsetCandidateSizeOne[$k];
CODE
		for(my$i=2;$i<=$itemsetSize;$i++){
			$code.=<<'CODE';
for(my$l=0;$l<=$#itemsetCandidateSizeOne;$l++){
	if($itemsetCandidateSizeOne[$l] ~~ @itemsetCandidate){
		next;
	}
	push @itemsetCandidate,$itemsetCandidateSizeOne[$l];
CODE
		}
		$code.=<<'CODE';
for(my$subsetSize=0;$subsetSize<$#itemsetCandidate;$subsetSize++){
	my @subsetLeft=();
	my @subsetRight=();
	for(my$m=0;$m<=$subsetSize;$m++){
		push @subsetLeft,$itemsetCandidate[$m];
	}
	@subsetLeft = sort @subsetLeft;
	@subsetRight = grep!($_~~@subsetLeft),@itemsetCandidate;
	@subsetRight = sort @subsetRight;
	if("@subsetRight" ~~ @{$itemsetSubset{"@subsetLeft"}}){
		next;
	}
	push @{$itemsetSubset{"@subsetLeft"}},"@subsetRight";
	my @transactionsAssociation = @{$self->{itemsKeyTransactions}{$subsetLeft[0]}};
	for(my$item=1;$item<=$#subsetLeft;$item++){
		@transactionsAssociation = grep $_ ~~ @transactionsAssociation,@{$self->{itemsKeyTransactions}{$subsetLeft[$item]}};
	}
	my $supportAssociationLeft = scalar(@transactionsAssociation);
	if($supportAssociationLeft == 0){
		next;
	}
	for(my$item=0;$item<=$#subsetRight;$item++){
		@transactionsAssociation = grep $_ ~~ @transactionsAssociation,@{$self->{itemsKeyTransactions}{$subsetRight[$item]}};
	}
	my $supportAssociationRight = scalar(@transactionsAssociation);
	if($supportAssociationRight == 0){
		next;
	}
	my $support = sprintf("%.2f",(($supportAssociationRight/$self->{totalTransactions})*100));
	if($support >= $self->{minSupport}){
		my $confidence = sprintf("%.2f",(($supportAssociationRight/$supportAssociationLeft)*100));
		if($confidence >= $self->{minConfidence}){
			$self->{rule}++;
			@{$self->{associationRules}{$self->{rule}}{"R$self->{rule}"}{rule}} = ("{ @subsetLeft } => { @subsetRight }", $support, $confidence);
			@{$self->{associationRules}{$self->{rule}}{"R$self->{rule}"}{items}} = ();
			push @{$self->{associationRules}{$self->{rule}}{"R$self->{rule}"}{items}},@subsetLeft;
			my@items=grep!($_~~@newItemsetCandidateSizeOne),@subsetLeft;
			push @newItemsetCandidateSizeOne,@items;
			push @{$self->{associationRules}{$self->{rule}}{"R$self->{rule}"}{items}},@subsetRight;			
			@items=grep!($_~~@newItemsetCandidateSizeOne),@subsetRight;
			push @newItemsetCandidateSizeOne,@items;			
		}
	}
}
CODE
		for(my$j=2;$j<=$itemsetSize;$j++){
			$code.=<<'CODE';
	pop @itemsetCandidate;
}
CODE
		}
		$code.=<<'CODE';
	pop @itemsetCandidate;
}
CODE
		if($itemsetSize==$itemsetSizeLimit){
			eval $code;
			die("Error: $@\n") if $@;
			if($self->{messages}){
				print "\nFrequent itemset: { @newItemsetCandidateSizeOne }, ", scalar(@newItemsetCandidateSizeOne), " items ";
			}
			if($self->{associationRules}){
				@{$self->{frequentItemset}}=@newItemsetCandidateSizeOne;
				$self->output;
			}
		}
	}
	$itemsetSizeLimit += 1;
	if((scalar(@newItemsetCandidateSizeOne)>=$itemsetSizeLimit)&&$self->{recursively}){
		$self->{associationRules} = undef;
		$self->association_rules_itemset_size(\@newItemsetCandidateSizeOne, $itemsetSizeLimit);
	}
}

sub stop{
	if($self->{messages}){
		print "\nStopping... ";
		$self->output if $self->{associationRules};
		print "\nExit? (Y/N): ";
		my $exit = <STDIN>;
		chomp($exit);
		if($exit =~ /^y$/i){
			exit;
		}
		else{
			print "\nProcessing... ";
		}
	}
	else{
		$self->output if $self->{associationRules};
		exit;
	}
}

sub output{
	if($self->{output}){
		if($self->{output}==1){
			$self->file;
		}
		elsif($self->{output}==2){
			$self->excel;
		}
	}
}

sub file{
	if($self->{messages}){
		print "\nExporting to file \"output_itemset_size_$self->{itemsetSize}.txt\"... ";
	}
	open(FILE,">output_itemset_size_$self->{itemsetSize}.txt");
	print FILE "Rules\tSupport %\tConfidence %\n";
	foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
		${$self->{associationRules}{$rule}{"R$rule"}{rule}}[1]=~s/\./,/;
		${$self->{associationRules}{$rule}{"R$rule"}{rule}}[2]=~s/\./,/;
		print FILE "R$rule\t${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[1]\t${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[2]\n";
	}
	print FILE "\n";
	foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
		print FILE "Rule R$rule: ${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[0]\n";
		print FILE "Support: ${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[1] %\n";
		print FILE "Confidence: ${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[2] %\n";
		print FILE "Items:\n";
		foreach my$item(@{$self->{associationRules}{$rule}{"R$rule"}{items}}){
			print FILE "$item $self->{itemsKeyDescription}{$item}\n";
		}
		print FILE "\n";
	}
	print FILE "Frequent itemset: { @{$self->{frequentItemset}} }\n";
	print FILE "Items:\n";
	foreach my$item(@{$self->{frequentItemset}}){
		print FILE "$item $self->{itemsKeyDescription}{$item}\n";
	}
	close(FILE);
}

sub excel{
	require Excel::Writer::XLSX;
	if($self->{messages}){
		print "\nExporting to excel \"output_itemset_size_$self->{itemsetSize}.xlsx\"... ";
	}
	my $workbook  = Excel::Writer::XLSX->new("output_itemset_size_$self->{itemsetSize}.xlsx");
	my $worksheet = $workbook->add_worksheet();
	my $bold = $workbook->add_format(bold => 1);
	my $headings = ['Rules', 'Support %', 'Confidence %'];
	my(@rules,@support,@confidence);	
	foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
		push @rules,"R$rule";
		push @support,${$self->{associationRules}{$rule}{"R$rule"}{rule}}[1];
		push @confidence,${$self->{associationRules}{$rule}{"R$rule"}{rule}}[2];
	}
	my$line=(scalar(@rules)+1);
	my@data=(\@rules,\@support,\@confidence);
	$worksheet->write('A1', $headings, $bold);
	$worksheet->write('A2', \@data);
	my$chart=$workbook->add_chart(type => 'column', embedded => 1);
	$chart->add_series(
		name       => 'Support %',
		categories => '=Sheet1!$A$2:$A$'.$line,
		values     => '=Sheet1!$B$2:$B$'.$line,
	);
	$chart->add_series(
		name       => 'Confidence %',
		categories => '=Sheet1!$A$2:$A$'.$line,
		values     => '=Sheet1!$C$2:$C$'.$line,
	);
	$worksheet->insert_chart('H2', $chart);
	$line+=2;
	foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
		$worksheet->write("A$line","Rule R$rule: ${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[0]");
		$line++;
		$worksheet->write("A$line","Support: ${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[1] %");
		$line++;
		$worksheet->write("A$line","Confidence: ${$self->{associationRules}{$rule}{\"R$rule\"}{rule}}[2] %");
		$line++;
		$worksheet->write("A$line","Items:");
		$line++;
		foreach my$item(@{$self->{associationRules}{$rule}{"R$rule"}{items}}){
			$worksheet->write("A$line","$item $self->{itemsKeyDescription}{$item}");
			$line++;
		}
		$line++;
	}
	$worksheet->write("A$line","Frequent itemset: { @{$self->{frequentItemset}} }");
	$line++;
	$worksheet->write("A$line","Items:");
	$line++;
	foreach my$item(@{$self->{frequentItemset}}){
		$worksheet->write("A$line","$item $self->{itemsKeyDescription}{$item}");
		$line++;
	}
	$workbook->close;
}

return 1;
__END__
=head1 NAME

Data::Mining::Apriori - Perl extension for implement the apriori algorithm of data mining.

=head1 SYNOPSIS

	use strict;
	use warnings;
	use Data::Mining::Apriori;

	# TRANSACTION 103:CEREAL 101:MILK 102:BREAD
	#        1101          1        1         0
	#        1102          1        0         1
	#        1103          1        1         1
	#        1104          1        1         1
	#        1105          0        1         1
	#        1106          1        1         1
	#        1107          1        1         1
	#        1108          1        0         1
	#        1109          1        1         1
	#        1110          1        1         1

	my $apriori = new Data::Mining::Apriori;

	$apriori->{totalTransactions}=10; # The total number of transactions

	$apriori->{minSupport}=1.55; # The minimum support

	$apriori->{minConfidence}=1.55; # The minimum confidence

	$apriori->{output}=1; # The output type (1 - Export to file; 2 - Export to excel)(optional)

	$apriori->{messages}=1; # A value boolean to display the messages(optional)

	$apriori->{itemsKeyDescription}{'101'}='MILK'; # Hash table to add items by key and description
	$apriori->{itemsKeyDescription}{102}='BREAD';
	$apriori->{itemsKeyDescription}{'103'}='CEREAL';

	@{$apriori->{itemsKeyTransactions}{'101'}}=('1101',1103,'1104',1105,'1106',1107,'1109',1110);
	# Reference to array, to add the transactions of each item per key
	@{$apriori->{itemsKeyTransactions}{102}}=('1102',1103,'1104',1105,'1106',1107,1108,'1109',1110);
	@{$apriori->{itemsKeyTransactions}{'103'}}=('1101',1102,1103,'1104','1106',1107,1108,'1109',1110);

	$apriori->generate_rules; # Generate association rules to no longer meet the minimum support and confidence
	# or
	# $apriori->association_rules_itemset_size(3); # Generate rules from a set of items size 3, for example

	print "\n@{$apriori->{frequentItemset}}\n"; # Show frequent items

	# or from a database

	# CREATE TABLE dimension_product(
	# 	product_key INTEGER NOT NULL PRIMARY KEY,
	# 	product_alternate_key INTEGER NOT NULL,
	# 	product_name TEXT NOT NULL,
	# 	price REAL NOT NULL
	#  // ...
	# );
	#
	# INSERT INTO dimension_product VALUES(1,101,'MILK',10.00);
	# INSERT INTO dimension_product VALUES(2,102,'BREAD',10.00);
	# INSERT INTO dimension_product VALUES(3,103,'CEREAL',10.00);
	# 
	# // ...
	# 
	# CREATE TABLE fact_sales(
	# 	sales_order_number INTEGER NOT NULL,
	# 	sales_order_line_number INTEGER NOT NULL,
	# 	product_key INTEGER NOT NULL,
	# 	quantity INTEGER NOT NULL,
	#  // ...
	# 	PRIMARY KEY(sales_order_number, sales_order_line_number),
	# 	FOREIGN KEY(product_key) REFERENCES dimension_product(product_key)
	# );
	#
	# INSERT INTO fact_sales VALUES(1101,1,3,1);
	# INSERT INTO fact_sales VALUES(1101,2,1,1);
	# INSERT INTO fact_sales VALUES(1102,1,3,1);
	# INSERT INTO fact_sales VALUES(1102,2,2,1);
	# INSERT INTO fact_sales VALUES(1103,1,1,1);
	# INSERT INTO fact_sales VALUES(1103,2,2,1);
	# INSERT INTO fact_sales VALUES(1103,3,3,1);
	# INSERT INTO fact_sales VALUES(1104,1,1,1);
	# INSERT INTO fact_sales VALUES(1104,2,2,1);
	# INSERT INTO fact_sales VALUES(1104,3,3,1);
	# INSERT INTO fact_sales VALUES(1105,1,1,1);
	# INSERT INTO fact_sales VALUES(1105,2,2,1);
	# INSERT INTO fact_sales VALUES(1106,1,1,1);
	# INSERT INTO fact_sales VALUES(1106,2,2,1);
	# INSERT INTO fact_sales VALUES(1106,3,3,1);
	# INSERT INTO fact_sales VALUES(1107,1,1,1);
	# INSERT INTO fact_sales VALUES(1107,2,2,1);
	# INSERT INTO fact_sales VALUES(1107,3,3,1);
	# INSERT INTO fact_sales VALUES(1108,1,3,1);
	# INSERT INTO fact_sales VALUES(1108,2,2,1);
	# INSERT INTO fact_sales VALUES(1109,1,1,1);
	# INSERT INTO fact_sales VALUES(1109,2,2,1);
	# INSERT INTO fact_sales VALUES(1109,3,3,1);
	# INSERT INTO fact_sales VALUES(1110,1,1,1);
	# INSERT INTO fact_sales VALUES(1110,2,2,1);
	# INSERT INTO fact_sales VALUES(1110,3,3,1);

	use DBD::SQLite;
	use Data::Mining::Apriori;

	my $db = DBI->connect('dbi:SQLite:dbname=DW.db','','');

	my $sql = q~
	SELECT COUNT(DISTINCT(sales_order_number)) FROM fact_sales
	/* WHERE ... */
	~;

	my $query = $db->prepare($sql);
	$query->execute;
	my $totalTransactions = $query->fetchrow;

	$apriori = new Data::Mining::Apriori;

	$apriori->{totalTransactions}=$totalTransactions;

	$apriori->{minSupport}=1.55;

	$apriori->{minConfidence}=1.55;

	$apriori->{output}=1;

	$apriori->{messages}=1;

	my $support = int((($apriori->{totalTransactions}/100)*$apriori->{minSupport}));

	$sql = qq~
	SELECT dp.product_alternate_key, dp.product_name, COUNT(*)
	FROM dimension_product dp
	JOIN fact_sales fs ON
	dp.product_key = fs.product_key
	/* WHERE ... */
	GROUP BY dp.product_alternate_key, dp.product_name
	HAVING COUNT(*) >= $support
	~;

	$query = $db->prepare($sql);
	$query->execute;
	while(my($key,$description)=$query->fetchrow_array){
		$apriori->{itemsKeyDescription}{$key}=$description;
	}

	foreach my$key(keys(%{$apriori->{itemsKeyDescription}})){
		$sql = qq~
		SELECT DISTINCT(fs.sales_order_number)
		FROM dimension_product dp
		JOIN fact_sales fs ON
		dp.product_key = fs.product_key
		WHERE dp.product_alternate_key = $key
		/* AND ... */
		~;
		$query = $db->prepare($sql);
		$query->execute;
		while(my$transaction=$query->fetchrow){
			push @{$apriori->{itemsKeyTransactions}{$key}},$transaction;
		}
	}

	$apriori->generate_rules;
	# or
	# $apriori->association_rules_itemset_size(3);

	print "\n@{$apriori->{frequentItemset}}\n";

=head1 DESCRIPTION

This module implements the apriori algorithm of data mining.

=head1 ATTRIBUTES

=head2 totalTransactions

The total number of transactions.

=head2 minSupport

The minimum support.

=head2 minConfidence

The minimum confidence.

=head2 output

The output type (optional):

=over 4

=item 1

- Text file;

=item 2

- Excel file.

=back

=head2 messages

A value boolean to display the messages(optional).

=head2 itemsKeyDescription

Hash table to add items by key and description.

=head2 itemsKeyTransactions

Reference to array, to add the transactions of each item per key.

=head2 quantityPossibleRules

Quantity of possible rules.

=head2 frequentItemset

Frequent itemset.

=head2 recursively

A value boolean to generate association rules until no set of items meets the minimum support or minimum confidence.

=head1 METHODS

=head2 new

Creates a new instance of Data::Mining::Apriori.

=head2 generate_rules

Generate association rules until no set of items meets the minimum support or minimum confidence.

=head2 association_rules_itemset_size

Generates association rules by size set of items. Accepts the following argument:

=over 4

=item *

An integer.

Representing the size of the set of items.

=back

=head1 AUTHOR

Alex Graciano, E<lt>agraciano@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2015-2016 by Alex Graciano

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.12.4 or,
at your option, any later version of Perl 5 you may have available.

=cut
