#A bunch of R functions to assist phytosociological tabulation

#Author: Tiago Monteiro-Henriques
#Licence: AGPL-3
#Version 0.0-1
#Date: 2016-04-03

#The following functions have been developed by Tiago Monteiro-Henriques aiming at assisting phytosociological tabulation

#SOME DEFINITIONS

#k-partition: Having N relevés to classify into k groups, a k-partition is any set of nonempty k subsets (groups) of the N relevés, such that every relevé is in exactly one of these subsets (groups).

#n-neighbour: A n-neighbour of a partition is a second partition in which n relevés are placed in a different group.

#optimization: Mathematical optimization aims at selecting the best value from a set of alternatives. In this work we use hill-climbing algorithm and stochastic hill-climbing algorithm to search for a partition of the relevés that maximizes an index. It is important to bear in mind, however, that it is not mathematically feasible to optimize two or more criteria at once expecting a single best value. I.e., for a nontrivial problem, finding a unique solution that simultaneously shows a maximum value for a certain first criteria and a maximum value for a certain second criteria it is simply not possible(!).


#FUNCTIONS


#########################
# FUNCTION HC.optim.tdv #
#########################

#DESCRIPTION

#HC.optim.tdv implements the hill-climbing algorithm, screening all 1-neighbours of a partition in each iteration. Optionally (random.first=TRUE) an additional phase of stochastic hill-climbing is performed firstly, where random n-neighbours are evaluated in each iteration

#USAGE

#HC.optim.tdv(m, k, maxit=10, p.ini="random", min.g.size=2, random.first=FALSE, rf.neigh.size=1, rf.maxit=500)

#ARGUMENTS

#m							a matrix. a phytosociological table of 0s (absences) and 1s (presences), where rows correspond to taxa and columns correspond to relevés
#k							integer. the number of desired groups
#maxit						integer. the number of iterations of the hill-climbing optimization
#p.ini						a vector of integers (taking values from 1 to k, with length equal to the number of columns of m, ascribing each relevé to one of the k groups), which will be used as the initial partition. By default, "random", generates a random initial partition.
#min.g.size					the minimum number of relevés that a group can contain (must be 2 or higher)
#random.first				logical. TRUE: performs hill-climbing on the 1-neighbourhood; FALSE: additionally performs stochastic hill-climbing on random n-neighbours (n is definded by the parameter rf.neigh.size) before performing the hill-climbing search on the 1-neighbourhood; see description above.
#rf.neigh.size				integer. the size (n) of the n-neighbour for the stochastic hill-climbing; only used if random.first=TRUE.
#rf.maxit					integer. the number of iterations of the hill-climbing optimization; only used if random.first=TRUE.

#DETAILS

#HC.optim.tdv accepts a phytosociological table (m, rows corresponding to taxa and columns corresponding to relevés) and searches for a k-partition (k, defined by the user) optimizing TotDiffVal1 index (see http://home.isa.utl.pt/~tmh/), i.e. searches, using a hill-climbing algorithm, for patterns of differential taxa by rearranging the relevés into k groups. Optimization can start from a random partition (p.ini="random"), or from a given partition (p.ini, defined by the user) e.g. produced by any clustering method, or even a manual classification. Each iteration searches for a TotDiffVal1 improvement screening all 1-neighbours, until the given number of maximum iterations (maxit) is reached. Optionally, a faster search (stochastic hill-climbing) can be performed firstly (defining random.first=TRUE), consisting on searching for TotDiffVal1 improvements, by randomly selecting n-neighbours (n defined by the user with the parameter rf.neigh.size), until a given number of maximum iterations (rf.maxit) is reached. Stochastic hill-climbing might be helpful for big tables (where the simple screening of all 1-neighbours might be too time consuming). It is also useful to avoid local maxima in which hill-climbing might be easily stuck. 

#Several runs of HC.optim.tdv (multi-starts) should be tried out, as several local maxima are certainly present and the hill-climbing algorithm converges easily to them. Sometimes, converging to a known high-valued partition is very unlikely, being dependent on data structure and on the initial partition. Trimming your table by a 'constancy' range (see "select.taxa" function) or using the result of other cluster methodologies as input, might help finding interesting partitions. Specially after trimming the table by a 'constancy' range, getting a random partition with TotDiffVal1 greater than zero might be very unlike; on such cases using the result of other clustering strategies as input is useful if not mandatory.

#VALUE

#A list with the following components:

#res.rf					a matrix with the iteration number (of the stochastic hill-climbing phase), the maximum TotDiffVal1 found until that iteration, and the higher TotDiffVal1 among all 1-neighbours; a first line of zeros is being added at the beginning of the matrix, which should be removed in future.
#par.rf					the best partition found in the stochastic hill-climbing phase
#max.TotDiffVal1.rf		maximum TotDifVal1 found in the stochastic hill-climbing phase
#res 					a matrix with the iteration number (of the hill-climbing), the maximum TotDiffVal1 found until that iteration, and the higher TotDiffVal1 among all 1-neighbours; a first line of zeros is being added at the beginning of the matrix, which should be removed in future.
#par					the best partition found in the hill-climbing phase
#local_maximum			logical indicating if par is a 1-neighbour local maximum
#time					the amount of time ("Sys.time") used by the function
#max.TotDiffVal1		maximum TotDifVal1 found

#EXAMPLES

#Still to come.


HC.optim.tdv <- function(m, k, maxit=10, p.ini="random", min.g.size=2, random.first=FALSE, rf.neigh.size=1, rf.maxit=500) {
	
	t <- Sys.time()
	if (!identical(c(0,1), sort(unique(as.vector(as.matrix(m)))))) {stop("Matrix m should contain only 0's and 1's")}
	if (min(rowSums(m))==0) {stop("At least one taxa is not present in any relevé")}
	if (min(colSums(m))==0) {stop("At least one relevé contains no taxa")}
	if ((mgs <- as.integer(min.g.size))<2) {stop("Object min.g.size must be greater than 2")}
	mt <- as.matrix(t(m))
	nr <- nrow(mt) # no. of relevés
	ns <- ncol(mt) #no. of taxa
	if (nr <= mgs*k) {stop(paste0("Random partition cannot guarantee at least ", mgs, " relevés per group!"))}
	if (p.ini[1] == "random") {p.ini <- sample(c(rep(1:k, mgs), sample(k,nr-mgs*k,replace=TRUE))); tp <- table(p.ini)} else {
		tp <- table(p.ini)
		if (!identical(as.integer(sort(unique(p.ini))),1:k)) {stop("Your partition doesn't have k groups!")} #maybe when p.ini is given k could be ignored
		if (!identical(length(p.ini), nr)) {stop("Object p.ini must be a partition of the columns of matrix m")}
		if (min(tp)<mgs) {stop(paste0("At least one group of the provided partition has less than ", mgs, " elements"))}
		if (max(tp)==mgs) {stop(paste0("At least one group of the provided partition has to have more than ", mgs, " elements"))}
	}	
	arf <- matrix(0, k, ns); colnames(arf) <- colnames(mt); rownames(arf) <- 1:k; adp <- arf #arf is the adjusted relative frequency in each group and adp is the adjusted differential proportion
	afg <- rowsum(mt, group=p.ini) #no. of relevés containing the taxon, within each group (absolute frequency in each group)
	t1 <- (afg==0)*as.vector(tp) #no. of relevés of each group, when the taxon is not present
	t2 <- colSums(afg>0) #no. of groups containing the taxon
	t3 <- t2>1 #indices of the taxa occurring in more than one group (they must occur in at least one)
	for (i in 1:k) { #fills matrices adp and arf, only when the taxon is present in the group!
		rel.i <- which(p.ini==i) #indices of the relevés of the group
		spe.i <- afg[i,]>0 #indices of the taxa present in the group
		adp[i,spe.i & !t3] <- 1 #adp is 1 for the taxa occurring in one group only!
		if (sum(t3)>0) { #for taxa occurring in more than one group
			t4 <- spe.i & t3
			adp[i, t4] <- colSums(t1[-i, t4, drop=FALSE]/sum(tp[-i]))/(t2[t4])
		}
		arf[i, spe.i] <- afg[i,spe.i]/as.vector(tp[i])/(t2[spe.i])
	}
	cuscor <- sum(colSums(arf*adp))/ns
	
	res.rf=NULL
	par.rf=NULL
	cuscor.rf=NULL
	if (random.first==TRUE) {

		#random.neighbour: this function returns randomly one of the (k-1)*nr 1-neighbour partitions, assuring that the minimum group size (mgs) is respected
		random.neighbour <- function (p) {
			tp <- table(p)
			k.int <- which(tp>mgs) #k of interest to sample
			k.sam <- tp[k.int]-mgs #k samplable
			troca <- sample(rep(k.int,k.sam), min(rf.neigh.size, sum(k.sam))) #neighbourhood.size cannot be greater than sum(k.sam)
			niter <- table(troca)
			gn.tot <- NULL
			in.tot <- NULL
			for (gv in sort(unique(troca))) {#it assumes that table function also sorts!
      			if(length(c(1:k)[-gv])!=1) {gn.tot <- c(gn.tot,sample(c(1:k)[-gv],niter[paste(gv)],replace=TRUE))} else {gn.tot <- c(gn.tot,rep(c(1:k)[-gv],niter[paste(gv)]))}
      			in.tot <- c(in.tot,sample(which(p==gv),niter[paste(gv)]))
      		}
    		p[in.tot] <- gn.tot
    		return(p)
		}
	
		#STOCHASTIC HILL CLIMBING (random.first=TRUE)
		parcor <- p.ini
  		res.rf <- c(0,0,0)
  		for (iter in 1:rf.maxit) {
   			parviz <- random.neighbour(parcor)
   			tp <- table(parviz)
			arf <- matrix(0, k, ns); colnames(arf) <- colnames(mt); rownames(arf) <- 1:k; adp <- arf #arf is the adjusted relative frequency in each group and adp is the adjusted differential proportion
			afg <- rowsum(mt, group=parviz) #no. of relevés containing the taxon, within each group (absolute frequency in each group)
			t1 <- (afg==0)*as.vector(tp) #no. of relevés of each group, when the taxon is not present
			t2 <- colSums(afg>0) #no. of groups containing the taxon
			t3 <- t2>1 #indices of the taxa occurring in more than one group (they must occur in at least one)
			for (i in 1:k) { #fills matrices adp and arf, only when the taxon is present in the group!
				rel.i <- which(parviz==i) #indices of the relevés of the group
				spe.i <- afg[i,]>0 #indices of the taxa present in the group
				adp[i,spe.i & !t3] <- 1 #adp is 1 for the taxa occurring in one group only!
				if (sum(t3)>0) { #for taxa occurring in more than one group
					t4 <- spe.i & t3
					adp[i, t4] <- colSums(t1[-i, t4, drop=FALSE]/sum(tp[-i]))/(t2[t4])
				}
				arf[i, spe.i] <- afg[i,spe.i]/as.vector(tp[i])/(t2[spe.i])
			}
			cusviz <- sum(colSums(arf*adp))/ns
			if (cusviz >= cuscor) {parcor <- parviz; cuscor <- cusviz}
			res.rf <- rbind(res.rf,c(iter,cuscor,cusviz))	
		}
		p.ini <- par.rf <- parcor
		cuscor.rf <- cuscor
		tp <- table(p.ini)
		#afg, t1, t2, t3, arf and adp must be recalculated for parcor (as a worse neighbour might have been used to calculate them before)!
		arf <- matrix(0, k, ns); colnames(arf) <- colnames(mt); rownames(arf) <- 1:k; adp <- arf #arf is the adjusted relative frequency in each group and adp is the adjusted differential proportion
		afg <- rowsum(mt, group=p.ini) #no. of relevés containing the taxon, within each group (absolute frequency in each group)
		t1 <- (afg==0)*as.vector(tp) #no. of relevés of each group, when the taxon is not present
		t2 <- colSums(afg>0) #no. of groups containing the taxon
		t3 <- t2>1 #indices of the taxa occurring in more than one group (they must occur in at least one)
		for (i in 1:k) { #fills matrices adp and arf, only when the taxon is present in the group!
			rel.i <- which(p.ini==i) #indices of the relevés of the group
			spe.i <- afg[i,]>0 #indices of the taxa present in the group
			adp[i,spe.i & !t3] <- 1 #adp is 1 for the taxa occurring in one group only!
			if (sum(t3)>0) { #for taxa occurring in more than one group
				t4 <- spe.i & t3
				adp[i, t4] <- colSums(t1[-i, t4, drop=FALSE]/sum(tp[-i]))/(t2[t4])
			}
		arf[i, spe.i] <- afg[i,spe.i]/as.vector(tp[i])/(t2[spe.i])
		}
    }
    		
	#max.tdv.neighbour: this function returns the(one of the) 1-neighbouring partition(s) presenting greater TotDiffVal1 (tdv)
	max.tdv.neighbour <- function (p) {
		tp <- table(p); mtemp <- matrix(p, nr, nr, byrow=TRUE); fordiag <- p; res1 <- NULL; res2 <- NULL
		if (min(tp)>mgs) { #all groups have more than mgs elements
			for (i in 1:(k-1)) {
				fordiag <- fordiag+1
				fordiag[which(fordiag==k+1)] <- 1
				diag(mtemp) <- fordiag
				res1 <- rbind(res1,mtemp)
				res2 <- c(res2,fordiag)
			}
			res2 <- cbind(rep(p, k-1),res2)
			colnames(res2) <- NULL	
		} else { #at least one group has only mgs elements
			ind.rm <- as.numeric(sapply(which(tp==mgs), function (x) {which(p==x)})) #it returns the indices of the partition corresponding to groups presenting only mgs elements #as.numeric is probably not necessary, it is possibly done by default in []
			for (i in 1:(k-1)) {
				fordiag <- fordiag+1
				fordiag[which(fordiag==k+1)] <- 1
				diag(mtemp) <- fordiag
				res1 <- rbind(res1,mtemp[-ind.rm,])
				res2 <- c(res2,fordiag[-ind.rm])
			}
			res2 <- cbind(rep(p[-ind.rm], k-1),res2)
			colnames(res2) <- NULL		
		}
		mat.neig <- list(p.list=res1,pairs=res2)
	
		mat.neig.tdv <- sapply(1:nrow(mat.neig$p.list), function (x) { #as it is difficult to parallelize!
			pn <- mat.neig$p.list[x,]
			kc <- mat.neig$pairs[x,]
			tpn <- table(pn) #neighbour partition
			for (i in kc) {afg[i,] <- colSums(mt[which(pn==i),])} #(updates afg) no. of relevés containing the taxa, within each group, only for the two groups that changed a relevé!
			t5 <- afg[kc,][1,]>0 | afg[kc,][2,]>0 #taxa affected by the change of relevés	
			t1[kc,] <- (afg==0)[kc,]*as.vector(tpn)[kc] #(updates t1) no. of relevés of each group, when the taxon is not present #t5 must not be used here!
			t2[t5] <- colSums(afg>0)[t5] #(new t2) no. of groups containing the taxon
			t3[t5] <- (t2>1)[t5] #indices of the taxa occurring in more than one group (they must occur in at least one)
			for (i in 1:k) { #changes matrices adp and arf, only when the taxon is present in the group!
				rel.i <- which(pn==i) #indices of the relevés of the group
				spe.i <- afg[i,]>0 #indices of the taxa present in the group
				if (sum(spe.i & t5)>0) { #caso o grupo em causa contenha espécies afetadas
					adp[i, spe.i & !t3] <- 1 #adp is 1 for the taxa occurring in one group only!
					if (sum(t3)>0) { #for taxa occurring in more than one group
						t4 <- spe.i & t3
						adp[i, t4] <- colSums(t1[-i, t4, drop=FALSE]/sum(tpn[-i]))/(t2[t4])
					}
					adp[i, t5 & !spe.i] <- 0 #inserts a zero to the affected taxa that is no more present in the group!
					arf[i, t5] <- afg[i, t5]/as.vector(tpn[i])/(t2[t5])
				}			
			}
			return(sum(colSums(arf*adp))/ns)
		})
		return(list(tdv=tdv <- max(mat.neig.tdv), p=mat.neig$p.list[which(mat.neig.tdv==tdv)[1],]))
	}

	#HILL CLIMBING
	if (maxit==0) {res <- NULL; parcor=p.ini; loc_max=NULL} else {
  		loc_max <- FALSE
  		parcor <- p.ini
  		#cuscor is already calculated
  		res <- c(0,0,0)
  		for (iter in 1:maxit) {
    		temp <- max.tdv.neighbour(parcor)
    		parviz <- temp$p
    		cusviz <- temp$tdv
    		if (cusviz > cuscor) {parcor <- parviz; cuscor <- cusviz} else {loc_max <- TRUE; print("Local maximum reached"); break}
    		res <- rbind(res,c(iter,cuscor,cusviz))
  		}		
  	}
  	res.t <- Sys.time() - t
  	return(list(res.rf=res.rf, par.rf=par.rf, max.TotDiffVal1.rf=cuscor.rf,res=res, par=parcor, local_maximum=loc_max, time=res.t, max.TotDiffVal1=cuscor))
}


################
# FUNCTION tdv #
################

#DESCRIPTION

#tdv calculates TotDiffVal1 index (see http://home.isa.utl.pt/~tmh/ for the calculation formula of TotDiffVal1).

#USAGE

#tdv(m, k, p, full.output=FALSE)

#ARGUMENTS

#m							a matrix. a phytosociological table of 0s (absences) and 1s (presences), where rows correspond to taxa and columns correspond to relevés
#p							a vector of integers (a k-partition, taking values from 1 to k, with length equal to the number of columns of m, ascribing each relevé to one of the k groups).
#full.output				logical. it determines the amount of information returned by the function (see VALUE).

#DETAILS

#tdv accepts a phytosociological table (m, rows corresponding to taxa and columns corresponding to relevés) and a k-partition (p), returning the respective TotDiffVal1 index.

#VALUE

#If full.output is FALSE, a list with the following components:

#arf			the adjusted relative frequency (a/b/e) of each taxa in each group (see http://home.isa.utl.pt/~tmh/ for the calculation formula)
#adp			the adjusted differential proportion (c/d/e) of each taxa in each group (see http://home.isa.utl.pt/~tmh/ for the calculation formula)
#DiffVal1		the DiffVal1 of each taxa (see http://home.isa.utl.pt/~tmh/ for the calculation formula)
#TotDiffVal1 	the TotDiffVal1 of matrix m given the partition p (see http://home.isa.utl.pt/~tmh/ for the calculation formula)

#If full.output is TRUE, these extra components are added: afg, t1, t2, t3. These are intermediate matrices used in the computation of TotDiffVal1 (see function code for details).

#EXAMPLES

#Still to come.


tdv <- function (m, p, full.output=FALSE) {
	if (!identical(length(p), ncol(m))) {stop("Object p must be a partition of the columns of m")}
	k <- max(p)
	if (!identical(c(0,1), sort(unique(as.vector(as.matrix(m)))))) {stop("Matrix m should contain only 0's and 1's")}
	if (min(rowSums(m))==0) {stop("At least one taxa is not present in any relevé")}
	if (min(colSums(m))==0) {stop("At least one relevé contains no taxa")}
	mt <- t(m)
	ns <- ncol(mt) #no. of taxa
	tp <- table(p)
	arf <- matrix(0, k, ns); colnames(arf) <- colnames(mt); rownames(arf) <- 1:k; adp <- arf
	afg <- rowsum(mt, group=p) #no. of relevés containing the taxon, within each group (absolute frequency in each group)
	t1 <- (afg==0)*as.vector(tp) #no. of relevés of each group, when the taxon is not present
	t2 <- colSums(afg>0) #no. of groups containing the taxon
	t3 <- t2>1 #indices of the taxa occurring in more than one group (they must occur in at least one)

	for (i in 1:k) { #fills matrices adp and arf, only when the taxon is present in the group!
		rel.i <- which(p==i) #indices of the relevés of the group
		spe.i <- afg[i,]>0 #indices of the taxa present in the group
		adp[i,spe.i & !t3] <- 1 #adp is 1 for the taxa occurring in one group only!
		if (sum(t3)>0) { #for taxa occurring in more than one group
			t4 <- spe.i & t3
			adp[i, t4] <- colSums(t1[-i, t4, drop=FALSE]/sum(tp[-i]))/(t2[t4])
		}
		arf[i, spe.i] <- afg[i,spe.i]/as.vector(tp[i])/(t2[spe.i])
	} 
if (full.output==FALSE) {return(list(arf=t(arf),adp=t(adp),DiffVal1=matrix(colSums(arf*adp), ns, 1, dimnames=list(colnames(arf),c("diff.val1"))),TotDiffVal1=sum(colSums(arf*adp))/ns))} else {
	return(list(afg=afg,t1=t1,t2=t2,t3=t3,arf=arf,adp=adp,DiffVal1=matrix(colSums(arf*adp),ns,1),TotDiffVal1=sum(colSums(arf*adp))/ns))}
}


#####################
# FUNCTION tabulate #
#####################

#DESCRIPTION

#tabulate orders a phytosociological table based on, firstly, the number of groups a taxon occurs in, and secondly, on the within-group (adjusted) relative frequency

#USAGE

#tabulate(m, p, taxa.names, plot.im=NULL)

#ARGUMENTS

#m							a matrix. a phytosociological table of 0s (absences) and 1s (presences), where rows correspond to taxa and columns correspond to relevés
#p							a vector of integers (a k-partition, taking values from 1 to k, with length equal to the number of columns of m, ascribing each relevé to one of the k groups).
#taxa.names					a character vector (with length equal to the number of rows of m) with taxa names.
#plot.im					by default, NULL, returns without plotting. "normal" plots an image of the tabulated matrix. "condensed" plots an image of the tabulated matrix but presenting sets of differential taxa as solid coloured blocks.

#DETAILS

#tabulate accepts a phytosociological table (m, rows corresponding to taxa and columns corresponding to relevés), a k-partition (p) and taxa names, returning an ordered table based on, firstly, the number of groups a taxon occurs in, and secondly, on the within-group (adjusted) relative frequency; optionally an image of the ordered table can be produced: plot.im="normal" plots an image of the tabulated matrix, each group corresponding to a different colour; plot.im="condensed" plots an image of the tabulated matrix (each group corresponding to a different colour) but presenting sets of differential taxa as solid coloured blocks.

#VALUE

#If plot.im is NULL, a list with the following components:

#taxa.names			a matrix with the iteration number (of the stochastic hill-climbing phase), the maximum TotDiffVal1 found until that iteration, and the higher TotDiffVal1 among all 1-neighbours; a first line of zeros is being added at the beginning of the matrix, which should be removed in future.
#taxa.ord				
#tabulated			the orderer m matrix. 	
#condensed			the matrix used to create the "condensed" image. 			

#If plot.im is "normal", it returns the above list and, additionally, plots an image of the tabulated matrix.

#If plot.im is "condensed", it returns the above list and, additionally, plots an image of the tabulated matrix but presenting sets of differential taxa as solid coloured blocks:

#EXAMPLES

#Still to come.


tabulate <- function (m, p, taxa.names, plot.im=NULL) {
	if (!identical(c(0,1), sort(unique(as.vector(as.matrix(m)))))) {stop("Matrix m should contain only 0's and 1's")}
	if (min(rowSums(m))==0) {stop("At least one taxa is not present in any relevé")}
	if (min(colSums(m))==0) {stop("At least one relevé contains no taxa")}
	mgs <- 2 #tem que ter no mínimo 2 grupos!
	k <- max(p)
	nr <- ncol(m) # no. of relevés
	ns <- nrow(m) #no. of taxa
	tp <- table(p)
	if (length(taxa.names)!=ns) {stop("The length of taxa.names must match the number of rows of m")}
	if (!identical(length(p), nr)) {stop("Object p must be a partition of the columns of m")}
	if (min(tp)<mgs) {stop("At least one group of the provided partition has less than 2 elements")}
	res <- tdv(m, p, full.output=TRUE)
	ot <- (res$afg>0)*(1:k)
	order.t1 <- res$t2 #no. of groups containing the taxon
	order.t2 <- apply(ot,2,function(x){prod(x[x>0])}) #product of group numbers when the taxon is present
	order.t3 <- 1-(colSums(res$arf)/res$t2) #1 - the sum of all the adjusted relative frequencies for the taxon
	sort.rel <- order(p)
	taxa.ord <- order(order.t1,order.t2,order.t3)
	mat1 <- rbind(sort(p),0, m[taxa.ord,sort.rel])
	colnames(mat1) <- sort.rel
	ht <- colnames(m)[sort.rel]
	rownames(mat1) <- c("group","space",as.character(taxa.names)[taxa.ord])
	mat2 <- t(res$adp*res$arf)[taxa.ord,]
	rownames(mat2) <- c(as.character(taxa.names)[taxa.ord])
	if (is.null(plot.im)) {} else if (plot.im=="normal") {
		mat1.im <- mat1
		mat1.im[3:(ns+2),] <- mat1.im[3:(ns+2),]*matrix(sort(p)+1,ns,nr,byrow=TRUE)
		mat1.im[mat1.im==0] <- 1
		mat1.im[1,] <- mat1.im[1,]+1
		mat1.im[2,] <- 0
		image(t(mat1.im[(ns+2):1,]),col = c("black","white",rainbow(k)), xaxt="n", yaxt="n")	
		} else if (plot.im=="condensed") {
			mat2.im <- mat2>0
			mat2.im <- rbind((1:k)+1, 0, mat2.im)
			mat2.im[3:(ns+2),] <- mat2.im[3:(ns+2),]*matrix((1:k)+1,ns,k,byrow=TRUE)
			mat2.im[mat2.im==0] <- 1
			mat2.im[2,] <- 0
			image(t(mat2.im[(ns+2):1,]),col = c("black","white",rainbow(k)), xaxt="n", yaxt="n")
		}	
	return(list('taxa.names'=taxa.names, 'taxa.ord'=taxa.ord, header=ht, tabulated=mat1[-2,], condensed=mat2))
}


###############################
# FUNCTION explore.tabulation #
###############################

#DESCRIPTION

#explore.tabulation plots an interactive image of a tabulation

#USAGE

#explore.tabulation(x)

#ARGUMENTS

#x							a list. as returned by the "tabulate" function

#DETAILS

#explore.tabulation accepts an object returned by the "tabulate" function, plotting a condensed image of the respective tabulated matrix, permitting the user to click on the coloured blocks and receive the respective list of taxa names on the console.

#VALUE

#returns invisibly, although it plots taxa names on the console upon the user click

#EXAMPLES

#Still to come.


explore.tabulation <- function (x) {	
	mat2 <- x$condensed; ns <- nrow(mat2)
	k <- ncol(mat2)
	taxa.names <- x$taxa.names
	taxa.ord <- x$taxa.ord
	mat2.im <- mat2>0
	mat2.im <- rbind((1:k)+1, 0, mat2.im)
	mat2.im[3:(ns+2),] <- mat2.im[3:(ns+2),]*matrix((1:k)+1,ns,k,byrow=TRUE)
	mat2.im[mat2.im==0] <- 1
	mat2.im[2,] <- 0
	image(t(mat2.im[(ns+2):1,]),col = c("black","white",rainbow(k)), xaxt="n", yaxt="n")
	id.x <- rep(seq(0,1,length.out=k))
	id.y <- rev(rep(seq(0,1,length.out=ns+2)[1:ns], times=k))
	list.cent.label <- apply((mat2>0)+0, 2, function (x) {
		ic <- 1; i.from <- NULL; i.to <- NULL; sen <- "from"
		x <- c(x,0)
		while (ic <= length(x)) { #maybe replace with strgsplit + grep!
			if (sen=="from") {
				if (x[ic]==0) {ic <- ic+1} else {i.from <- c(i.from,ic); sen <- "to"; ic <- ic+1}
			} else {if (x[ic]==1) {ic <- ic+1} else {i.to <- c(i.to,ic-1); sen <- "from"; ic <- ic+1}}
		}
		ind <- apply(cbind(i.from,i.to),1, function (z) seq(z[1],z[2]))
		if (is.matrix(ind)) {
			apply(ind, 2, function (y) {
				res.y <- mean(id.y[y])
				res.label <- paste(paste(taxa.names[taxa.ord][y], collapse="\n"), "\n\n")
				return(list(res.y=res.y, res.label=res.label))
			})				
		} else {
			lapply(ind, function (y) {
				res.y <- mean(id.y[y])
				res.label <- paste(paste(taxa.names[taxa.ord][y], collapse="\n"), "\n\n")
				return(list(res.y=res.y, res.label=res.label))	
			})
		}
	})
	id.y.cent.labt <- lapply(list.cent.label, function (x) {as.matrix(simplify2array(x,higher = FALSE)[1,])})
	id.y.cent.lab <- unlist(id.y.cent.labt)
	id.lab <- c("",unlist(sapply(list.cent.label, function (x) {as.matrix(simplify2array(x,higher = FALSE)[2,])})))
	id.x.cent.lab <- rep.int(id.x, times=sapply(id.y.cent.labt, length))
	#; print(id.y.cent.label); print(id.label)
	points(id.x.cent.lab , id.y.cent.lab, cex=0.5, pch=20)
	y.exit <- 0.95 #it can be improved
	points(1,y.exit,cex=3); points(1,y.exit,cex=2, col="red"); points(1,y.exit,cex=1, col="white")
	cat("Click on the plot black dots to retrieve taxon(taxa) names.\nClick on the top-right circle to exit.\n\n")
	res.click <- "start"
	while (res.click!="") {res.click <- id.lab[(identify(x=c(1,id.x.cent.lab), y=c(y.exit,id.y.cent.lab), labels=id.lab, cex=0.5, plot=FALSE, n=1))]; cat(res.click)}
	invisible()
}


########################
# FUNCTION identical.p #
########################

#DESCRIPTION

#identical.p checks if two k-partitions are identical

#USAGE

#identical.p(p1,p2)

#ARGUMENTS

#p1, p2						two vectors of integers (i.e. two k-partition, taking values from 1 to k) of the same length.

#DETAILS

#identical.p checks if two k-partitions are identical, i.e. if their groupings are the same (useful in the case that the partitions have divergent group numberings).

#VALUE

#TRUE if the k-partitions are identical; FALSE otherwise.

#EXAMPLES

#Still to come.


identical.p <- function(p1,p2) {
	if (!identical(length(p1), length(p2))) {stop("Partitions to compare must have the same length")}
	if (!identical(as.numeric(sort(unique(p1))),as.numeric(sort(unique(p2))))) {stop("Partitions to compare must use the same group names (or numbers) and must have the same total number of groups")}
	if (nrow(unique(cbind(p1,p2)))==length(unique(p1))) {res <- TRUE} else {res <- FALSE}
	return(res)
}


########################
# FUNCTION select.taxa #
########################

#DESCRIPTION

#select.taxa trims a phytosociological table by a 'constancy' range

#USAGE

#select.taxa(m, const=c(0,100), min.pres=NULL)

#ARGUMENTS

#m							a matrix. a phytosociological table of 0s (absences) and 1s (presences), where rows correspond to taxa and columns correspond to relevés
#const						a vector with two ordered numeric values, which will be used as minimum and maximum 'constancy' values to trim table m
#min.pres					integer. optionally, a minimum number presences of each taxa (in relevés). If provided, it overrides the 'constancy' range.

#DETAILS

#select.taxa accepts a phytosociological table (m), and minimum and maximum 'constancy' values (const). It returns a trimmed phytosociological table, removing taxa outside the given 'constancy' range and removing relevés that become empty. Optionally, a minimum number of presences (in relevés) might be used (min.pres) instead of the 'constancy' range.

#VALUE

#A list with the following components:

#remo.rel		the indices of the removed relevé(s)
#remo.sp		the indices of the removed taxon(taxa)
#m.select		the trimmed table

#EXAMPLES

#Still to come.


select.taxa <- function(m, const=c(0,100), min.pres=NULL) {
	if (!identical(c(0,1), sort(unique(as.vector(as.matrix(m)))))) {stop("Matrix m should contain only 0's and 1's")}
	if (min(rowSums(m))==0) {stop("At least one taxa is not present in any relevé")}
	if (min(colSums(m))==0) {stop("At least one relevé contains no taxa")}	
	mpa <- (m>0)+0
	m.col <- ncol(m)
	m.row <- nrow(m)
	if (is.null(min.pres)) { #selects based on constancy values
		inf <- const[1]/100
		sup <- const[2]/100
		sp.ind <- rowSums(mpa)>round(m.col*inf) & rowSums(mpa)<round(m.col*sup)
		m.select <- m[sp.ind,]
		rel.ind <- colSums(m.select)>0
		m.select <- m.select[, rel.ind]
		cat(paste0(m.row-sum(sp.ind), " taxon(taxa) removed and ", m.col-sum(rel.ind), " empty relevé(s) removed\n"))
		return(list(remo.rel=which(!rel.ind), remo.sp=which(!sp.ind), m.select=m.select))
	} else { #selects using number of presences in relevés
		sp.ind <- rowSums(mpa)>= min.pres
		m.select <- m[sp.ind,]
		rel.ind <- colSums(m.select)>0
		m.select <- m.select[, rel.ind]
		cat(paste0(m.row-sum(sp.ind), " taxon(taxa) removed and ", m.col-sum(rel.ind), " empty relevé(s) removed\n"))
		return(list(remo.rel=which(!rel.ind), remo.sp=which(!sp.ind), m.select=m.select))
	}		
}