Source code for jLM.CMEPostProcessing

#
# University of Illinois Open Source License
# Copyright 2008-2018 Luthey-Schulten Group,
# All rights reserved.
# 
# Developed by: Luthey-Schulten Group
#                           University of Illinois at Urbana-Champaign
#                           http://www.scs.uiuc.edu/~schulten
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the Software), to deal with 
# the Software without restriction, including without limitation the rights to 
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
# of the Software, and to permit persons to whom the Software is furnished to 
# do so, subject to the following conditions:
# 
# - Redistributions of source code must retain the above copyright notice, 
# this list of conditions and the following disclaimers.
# 
# - Redistributions in binary form must reproduce the above copyright notice, 
# this list of conditions and the following disclaimers in the documentation 
# and/or other materials provided with the distribution.
# 
# - Neither the names of the Luthey-Schulten Group, University of Illinois at
# Urbana-Champaign, nor the names of its contributors may be used to endorse or
# promote products derived from this Software without specific prior written
# permission.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 
# THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
# OTHER DEALINGS WITH THE SOFTWARE.
# 
# Author(s): Michael J. Hallock and Joseph R. Peterson
#

"""Post-processing for the Well-stirred CME model"""
try:
	import lm
except ImportError:
	lm = None
import h5py

import numpy as np
import matplotlib.pyplot as plt

from matplotlib import cm
from .LMLogger import *

try:
    from igraph import *
except ModuleNotFoundError:
    import sys

    if 'sphinx' not in sys.modules:
    	raise

# #######################
# HDF5 Handle Functions #
# #######################

[docs]
def openLMFile(filename):
	"""Open a Lattice Microbes File for reading

    Args:
        filename:
            Name of the file
    Returns:
        a handle to the file
    """
	f = h5py.File(filename, 'r')
	return f



[docs]
def closeLMFile(f):
	"""Close a Lattice Microbes File

    Args:
        f:
            A previously opened lattice microbes file
    """
	f.close()



# #################
# Trace Functions #
# #################

[docs]
def showTraceFromFile(filename, species, replicate, **kwargs):
	"""Show species trace from a particular replicate

    Args:
        filename:
            The patch to an HDF5 output file generated by LatticeMicrobes
        species:
            A list of species to plot
        replicate:
            The replicate to show trace for
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.  These are any arguments that are valid with matplotlib.plot
    """
	# Open the HDF5 output data file
	f=openLMFile(filename)
	fig = showTrace(f, species=species, replicate=replicate, **kwargs)
	plt.show()
	closeLMFile(f)



[docs]
def plotTraceFromFile(filename, species, replicate, outfile, **kwargs):
	"""Plot species from an output file

    Args:
        filename:
            The patch to an HDF5 output file generated by LatticeMicrobes
        species:
            A list of species to plot
        replicate:
            The replicate to show trace for
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.  These are any arguments that are valid with matplotlib.plot
        outfile:
            A filename to plot to
    """
	# Open the HDF5 output data file
	fig = plotTrace(openLMFile(filename), species=species, replicate=replicate, filename=outfile, **kwargs)
	plt.clf()



[docs]
def plotCMEReactionNetwork(sim, filename, withRxnNodes=False, collapseReversible=False):
	"""Plot the reaction scheme as a network 

    Args:
        sim:
            An CMESimulation object
        filename:
            A file to which to output
        withRxnNodes:
            Plot the graph with reactions as nodes (default: false)
        collapseReversible:
            Collapsee reversible reactions into one node (default: false)
    """
	# Get handles to CME internals
	rs=sim.reactions
	nr=0
	ss=sim.species_id
	ns=len(ss)
	sc=sim.initial_counts

	# Create Graph
	g = Graph(directed=True)
	totNodes=ns
	if withRxnNodes:
		nr=len(rs)
		totNodes += nr
	g.add_vertices(totNodes)

	# Add reactions and species to graph
	count=0
	for v in g.vs:
		if count < nr:
			g.vs[v.index]["type"]="reaction"
			g.vs[v.index]["label"]=str(rs[count][0])+str("->")+str(rs[count][1])
			g.vs[v.index]["rate"]=rs[count][2]
		else:
			g.vs[v.index]["type"]="specie"
			g.vs[v.index]["label"]=ss[count-nr]
			g.vs[v.index]["icount"]=sc[ss[count-nr]]
			
		count+=1
	
	# Add edge dependencies
	count=0
	if withRxnNodes:
		for rxn in rs:
			# Add reactants
			if isinstance(rxn[0], tuple):
				rctnum=ss.index(r)
				g.add_edges([(rctnum+nr,count)])
			else:
				if rxn[0] != '': # FIXME
					rctnum=ss.index(rxn[0])
					g.add_edges([(rctnum+nr,count)])
			# Add products
			if isinstance(rxn[1], tuple):
				for p in rxn[1]:
					pdtnum=ss.index(p)
					g.add_edges([(count, pdtnum+nr)])
			else:
				if rxn[1] != '': # FIXME
					pdtnum=ss.index(rxn[1])
					g.add_edges([(count, pdtnum+nr)])

			count+=1	
	else:
		for rxn in rs:
			if isinstance(rxn[0],tuple):
				for r in rxn[0]:
					rctnum=ss.index(r)
					if isinstance(rxn[1],tuple):
						for p in rxn[1]:
							prdnum=ss.index(p)
							g.add_edges([(rctnum,prdnum)])
					else:
						if rxn[1] != '': # FIXME
							prdnum=ss.index(rxn[1])
							g.add_edges([(rctnum,prdnum)])
			else:
				rctnum=ss.index(rxn[0])
				if isinstance(rxn[1],tuple):
					for p in rxn[1]:
						prdnum=ss.index(p)
						g.add_edges([(rctnum,prdnum)])
				else:
					if rxn[1] != '': # FIXME
						prdnum=ss.index(rxn[1])
						g.add_edges([(rctnum,prdnum)])
				

	# Save the graph...
	g.save(filename,format="gml")



[docs]
def showTrace(f, species, replicate, **kwargs):
	"""Show a specific species trace

    Args:
        f:
            An h5py object handle
        species:
            A list of species to show
        replicate:
            The replicate to show trace for
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.  These are any arguments that are valid with matplotlib.plot
    """
	fig = plotTrace(f,species=species,replicate=replicate, filename=None, **kwargs)
	plt.show()



[docs]
def plotTrace(f, species = None, replicate = 1, filename = None, **kwargs):
	"""Plot a specific species trace

    Args:
        f:
            An h5py object handle
        species:
            A specie name or a list of species to show; can be a single string or a iterable list of species
        filename:
            A filename to print to (default None, gives the same behavior as showAvgVar(...))
        replicate:
            The replicate to show trace for. Can be an integer or an iterable list of replicte numbers
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.
			These are any arguments that are valid with matplotlib.plot

    Returns:
        A handle to the figure object created which allows customization of plot attributes
    """

	# Get a handle to the Simulations group, where results are stored
	sim = f['Simulations']

	# Count the number of replicates that were run
	replicates = len(sim)

	if (replicates == 0):
		LMLogger.warning("No simulations have been performed.")
		raise Exception("Cannot plot trace, no simulations in LM file.")
	
	if isinstance(replicate, int):
		if int(replicate) > replicates or int(replicate) < 0:
			LMLogger.warning("No replicate number %d found in file %s.", int(replicate), f)
			raise Exception("Cannot plot trace, replicate not found:", int(replicate))
	elif hasattr(replicate, '__iter__'):
		for i in replicates:
			if int(replicate) > replicates or int(replicate) < 0:
				LMLogger.warning("No replicate number %d found in file %s.", int(replicate), f)
				raise Exception("Cannot plot trace, replicate not found:", int(replicate))
	else:
		raise Exception("Incompatible type for replicate argument")

	# Get the list of timesteps from the first replicate
	# SpeciesCountTimes is a 1D vector
	times = getTimesteps(f)

	# Initialize the array to store the mean count
	traces = np.zeros((len(times), len(species)))

	par = f['Parameters']
	
	if 'SpeciesNames' in par:
		namesDS = par['SpeciesNames'][:]
		names = namesDS.tolist()
		names = [spec[0] for spec in names]
	else:
		# Compatible with LM trajectory generated < 2.5
		names = par.attrs['speciesNames'].decode().split(',')

	for i in range(len(names)):
		if isinstance(names[i], bytes):
			names[i] = names[i].decode()

	LMLogger.info("names: %s", names)

	if species is None:
		species_nums  = [0]
		species_names = [0]
	else:
		species_nums  = [0] * len(species)
		species_names = [0] * len(species)

		for i, s in enumerate(species):
			# Get species name for plotting
			species_names[i] = s

			# Pull out array for faster access
			traces[:, i] = np.array(sim[str(replicate).rjust(7, '0')]['SpeciesCounts'][:, names.index(s)])

	fig = plt.figure(1)
	axs = plt.gca()

	# Plot the line graph of mean vs. time.
	plt.xlabel("Time (s)")
	plt.ylabel("Number")

	for i in range(traces.shape[1]):
		axs.plot(times, traces[:, i], **kwargs)

	# Add a legend if the species number is > 1
	if hasattr(species, '__iter__') and len(species) > 1:
		plt.legend(species)

	# Save the plot
	if filename is not None:
		plt.savefig(filename, orientation = 'portrait')

	# Return a handle to the figure for customization
	return fig


# ######################################
# Average/Standard Deviation Functions #
# ######################################

[docs]
def showAvgVarFromFile(filename, species, **kwargs):
	"""Show species from an output file
    Args:
        filename:
            The name of an HDF5 output file generated by LatticeMicrobes
        species:
            A list of species to plot
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.  These are any arguments that are valid with matplotlib.plot
    """
	# Open the HDF5 output data file
	f=openLMFile(filename)
	fig=showAvgVar(f, species, **kwargs)
	plt.show()
	closeLMFile(f)



[docs]
def plotAvgVarFromFile(filename, species, outfile, **kwargs):
	"""Plot species from an output file

    Args:
        filename:
            The name of an HDF5 output file generated by LatticeMicrobes
        species:
            A list of species to plot
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.  These are any arguments that are valid with matplotlib.plot
        outfile:
            A filename to plot to
    """
	# Open the HDF5 output data file
	f=openLMFile(filename)
	plotAvgVar(f, species=species, filename=outfile, **kwargs)
	closeLMFile(f)
	plt.clf()



[docs]
def showAvgVar(f, species, **kwargs):
	"""Show a specific species average over time and variance

    Args:
        f:
            An h5py object handle
        species:
            A list of species to show
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.  These are any arguments that are valid with matplotlib.plot
    """
	fig = plotAvgVar(f,species=species,filename=None, **kwargs)
	plt.show()



[docs]
def plotAvgVar(f, species = None, filename = None, **kwargs):
	"""Plot a specific species average over time and variance

    Args:
        f:
            An h5py object handle
        species:
            A list of species to show
        filename:
            A filename to print to (default None, gives the same behavior as showAvgVar(...))
        kwargs:
            Additional arguments to be passed on to matplotlib.plot.
			These are any arguments that are valid with matplotlib.plot

    Returns:
        A handle to the figure object created which allows customization of plot attributes
    """

	# Get a handle to the Simulations group, where results are stored
	sim = f['Simulations']

	# Count the number of replicates that were run
	replicates = len(sim)

	if (replicates == 0):
		LMLogger.warning("No simulations have been performed.")
		raise Exception("Could not return timesteps, no simulations have been performed.")

	# Get the list of timesteps from the first replicate
	# SpeciesCountTimes is a 1D vector
	times = np.array(sim[next(iter(sim))]['SpeciesCountTimes'])

	# Initialize the array to store the mean count
	means     = np.zeros((len(times), len(species)))
	variances = np.zeros((len(times), len(species)))

	par = f['Parameters']

	if 'SpeciesNames' in par:
		namesDS = par['SpeciesNames'][:]
		names = namesDS.tolist()
		names = [spec[0] for spec in names]
	else:
		# Compatible with LM trajectory generated < 2.5
		names = par.attrs['speciesNames'].decode().split(',')

	for i in range(len(names)):
		if isinstance(names[i], bytes):
			names[i] = names[i].decode()

	LMLogger.info("names: %s", names)

	if species is None:
		species_nums  = [0]
		species_names = [0]
	else:
		species_nums  = [0] * len(species)
		species_names = [0] * len(species)

		for i, s in enumerate(species):
			species_nums[i]  = names.index(s)
			species_names[i] = s

			# Get the mean and variacne for the specie
			means[:, i], variances[:, i], _ = getAvgVarTrace(f, s)

	# Actually create a figure handl
	fig = plt.figure(1)

	plt.subplot(211)

	# Plot the line graph of mean vs. time.
	plt.ylabel("E")

	for i in range(means.shape[1]):
		plt.plot(times, means[:, i], **kwargs)

	# Plot the variance
	plt.subplot(212)
	plt.xlabel("Time (s)")
	plt.ylabel("Var")

	for i in range(variances.shape[1]):
		plt.plot(times, variances[:, i], **kwargs)

	# Add a legend if the species number is > 1
	if len(species) > 1:
		plt.legend(species)

	# Save the plot
	if filename is not None:
		plt.savefig(filename, orientation = 'portrait')
	
	# Return a handle to the figure for customization
	return fig


# ####################
# 2D and 3D Plotting #
# ####################

[docs]
def getOccupancyKymograph(f, species=None, replicate=1):
	"""Compute the specie density(occupancy) among a slice of the simulation domain as a function over time for the given direciton

    Args:
        filename:
            Name of file to extract data from
        specie:
            A particular specie to plot density for
        replicate:
            The replicate to show trace for
    """
	# Get particle type
	h5=openLMFile(f)
	par=h5['Parameters']
	if 'SpeciesNames' in par:
		namesDS = par['SpeciesNames'][:]
		names = namesDS.tolist()
		names = [spec[0] for spec in names]
	else:
		# Compatible with LM trajectory generated < 2.5
		names = par.attrs['speciesNames'].decode().split(',')
# 	names=h5['Parameters'].attrs['speciesNames'].decode('utf8').split(',')
	partType=0
	if species not in names:
		LMLogger.error("Specie: %s was not in file: %s", species, f)
		return
	for n in names:
		if n == species:
			break
		partType += 1
	# Make sure it is an RDMESimulation
	if 'Lattice' not in h5['Model']['Diffusion']:
		LMLogger.error('%s may not be an RDME file, bailing out.'%(f))
		return

	h5.close()

	# Get the simulation file
	sim = lm.SimulationFile(f)

	# Get diffusion model
	LMLogger.info("Getting old Lattices")
	dm = lm.DiffusionModel()
	sim.getDiffusionModel(dm) # Pulls DM out and puts it in the local object

	# Get the times
	times = sim.getLatticeTimes(replicate)
	timesteps = len(times)

	# Get number of sites in each direction
	sitesX = dm.lattice_x_size()
	sitesY = dm.lattice_y_size()
	sitesZ = dm.lattice_z_size()
	delX = dm.lattice_spacing()
	pps = dm.particles_per_site()

	# Get the last lattice
	LMLogger.info("Building new Lattice")

	lattice     = lm.ByteLattice(sitesX, sitesY, sitesZ, delX, pps)
	siteLattice = lm.ByteLattice(sitesX, sitesY, sitesZ, delX, pps)
	sim.getDiffusionModelLattice(dm, siteLattice)

	# Data structures
	kymograph=np.ndarray(shape=(timesteps,sitesZ))


	# loop through lattice
	LMLogger.info("Looping through times")
	MIN=1.0e16
	MAX=0.0
	for t in range(timesteps):
		LMLogger.info("timestep: %d" %(t))
		sim.getLattice(replicate, t, lattice)
		normer=np.zeros(sitesZ)
		for z in range(sitesZ):
			for y in range(sitesY):
				for x in range(sitesX):
					normer[z] += 1.0
					for p in range(8):
						if lattice.getParticle(x,y,z,p) == 0:
							break;
						if lattice.getParticle(x,y,z,p) == partType:
							kymograph[t,z] += 1.0
		# Find the min and max and also normalize the kymograph by the occupancy
		for i in range(sitesZ):
			if normer[i] != 0.0:
				kymograph[t,i] /= normer[i]
				if kymograph[t,i] < MIN:
					MIN=kymograph[t,i]
				if kymograph[t,i] > MAX:
					MAX=kymograph[t,i]
			else:
				kymograph[t,i] = 0.0

	return kymograph



[docs]
def plotOccupancyKymograph(f, species=None, replicate=1, filename=None):
	"""Plot a specific specie density(occupancy) among a slice of the simulation domain as a function of a direction over time

    Args:
        f:
            The name of the LM file
        specie:
            A particular specie to plot density for
        filename:
            A filename to print to (default None, gives the same behavior as showAvgVar(...))
        replicate:
            The replicate to show trace for the kymograph from the file
    """
	kymograph = getOccupancyKymograph(f, species, replicate)

	# Do the plotting
	fig = plt.figure()
	plt.imshow(kymograph)	
	# Save the plot
	if filename is not None:
		plt.savefig(filename, orientation='portrait')

	return fig




[docs]
def showOccupancyKymograph(f, species=None, replicate=1):
	"""Show a specific specie density(occupancy) among a slice of the simulation domain as a function of a direction over time

    Args:
        f:
            An h5py object handle
        specie:
            A particular specie to plot density for
        replicate:
            The replicate to show trace for
    """
	fig = plotOccupancyKymograph(f, species, replicate)
	plt.show()



[docs]
def plotPhaseSpace(f, species=None, replicate=1, withHistogram=False):
	"""Plot the 2D or 3D phase space associated with the given species over the replicates indicated

    Args:
        f:
            An h5py object handle
        specie:
            An iterable of 2 or 3 specie names indicating whether to plot in 2D or 3D space
        replicate:
            The replicate to show trace for
        withHistogram:
            If set to true, a heatmap of the phase space over all replicates will be plotted in the background.  (NOTE: this only works in 2D.)
    """
	dimensions = len(species)
	if dimensions > 3 or dimensions < 2:
		raise Exception("Phase spaces can only be plotted for 2 or 3 species.")
	if withHistogram and dimensions == 3:
		raise Exception("Histogram plot can only be created for 2D phase space.")

	# Get the phase space for the specified species	
	phaseSpace = getPhaseSpace(f, species, replicate)

	# Do the color/contour map if needed
	fig = plt.figure()
	if withHistogram:
		histPhase, edges = getHistogram(f, species)
		xs,ys = np.meshgrid(edges[0],edges[1])
		# Draw contours
		levels = np.arange(min(histPhase),max(histPhase),5)	
		norm = cm.colors.Normalize(vmax=histPhase.max(), vmin=histPhase.min())
		cmap = cm.PRGn
		contours = plt.contourf(xs,ys,histPhase, levels, cmap=cm.get_cmap(cmap, len(levels)-1), norm=norm)
		# Draw contour lines
		countrouLevels = plt.contour(xs,ys, histPhase, contours.levels, colors='k', hold='on')
		

	# Plot the trace
	if dimensions == 3:
		ax = fig.gca(projection='3d')
		ax.plot(phaseSpace[:,0],phaseSpace[:,1],phaseSpace[:,2])
		ax.set_xlabel(species[:,0])
		ax.set_ylabel(species[1])
		ax.set_zlabel(species[2])
	elif dimensions == 2:
		ax = fig.gca()
		ax.plot(phaseSpace[0],phaseSpace[1])
		ax.set_xlabel(species[0])
		ax.set_ylabel(species[1])

	return fig	



# ##############################################################
# Functions to extract particular features from the simulation #
# ##############################################################


[docs]
def getSpecies(f):
    """Exract the species names

    Args:
        f:
            The HDF5 file handle to extract from or the name of a file to open
    """
    # Open file if a string is passed
    wasString = False
    if isinstance(f, str):
        f = openLMFile(f)
        wasString = True
        
    par=f['Parameters']
    
    # Get a handle a copy of the species names
    #species = f['Parameters'].attrs['speciesNames'].decode('utf8').split(",")
   
    if 'SpeciesNames' in par:
        # New version (>= 2.5)
        speciesDS = par['SpeciesNames'][:]
        species = [spec[0] for spec in speciesDS.tolist()]
    else:
        # Compatible with LM trajectory generated < 2.5
        species = par.attrs['speciesNames'].decode('utf8').split(",")
    
    # close file if need be
    if wasString:
        closeLMFile(f)
    
    return species



[docs]
def getTimesteps(f):
	"""Extract the timestep times

    Args:
        f:
            The HDF5 file handle to extract from or the name of a file to open

    Returns:
        The timestep times in a numpy array
    """
	# Open file if a string is passed
	wasString = False
	if isinstance(f, str):
		f = openLMFile(f)
		wasString = True

	# Get a handle to the Simulations group, where results are stored
	sim=f['Simulations']
	
	# Count the number of replicates that were run
	replicates=len(sim)
	if(replicates == 0):
		LMLogger.warning("No simulations have been performed.")
		if wasString:
			closeLMFile(f)
		raise Exception("Could not return timesteps, no simulations have been performed.")
	times=np.array(sim[next(iter(sim))]['SpeciesCountTimes'])

	# close file if need be
	if wasString:
		closeLMFile(f)

	return times



[docs]
def getSpecieTrace(f, specie, replicate = None, doublingTime = None):
	"""Extract data for a particular specie for the specified replicate(s)

    Args:
        f:
            The HDF5 file handle to extract from or the name of a file to open
        specie:
            The specie to extract data 
        replicate:
            The number of the replicate to extract from. If None (default), returns data for all replicates
        doublingTime:
            An optional doubling time parameter that will normalize average each trace by 
            time in the cell cycle assuming exponentially growing cell 
            (2log2 * x/2^(t/DT)), effectively normalizing against cell size. 
            Default: no averaging is performed

    Returns:
        If replicate is specified: The species time trace in a numpy array
        If replicate is None: A dictionary with replicate numbers as keys and time traces as values
    """

	# Open file if a string is passed
	wasString = False

	if isinstance(f, str):
		f = openLMFile(f)
		wasString = True

	# Get a handle to the Simulations group, where results are stored
	sim = f['Simulations']
	par = f['Parameters']
	
	# Count the number of replicates that were run
	replicates = len(sim)

	if (replicates == 0):
		LMLogger.warning("No simulations have been performed.")

		if wasString:
			closeLMFile(f)

		raise Exception("Could not return timesteps, no simulations have been performed.")
	
	if 'SpeciesNames' in par:
		# New version (>= 2.5)
		namesDS = par['SpeciesNames'][:]
		names = namesDS.tolist()
		names = [spec[0] for spec in names]
	else:
		# Compatible with LM trajectory generated < 2.5
		names = par.attrs['speciesNames'].decode('utf8').split(',')

	for i in range(len(names)):
		if isinstance(names[i], bytes):
			names[i] = names[i].decode()

	if specie not in names:
		LMLogger.error("Species: %s was not found in list: %s", specie, names)
		raise Exception("Species not found: ", specie)
	
	LMLogger.info("names: %s", names)

	# Get species number
	specieIdx = names.index(specie)

	# If no specific replicate requested, return all replicates
	if replicate is None:
		allTraces = {}
		for repNum in range(1, replicates + 1):
			repKey = str(repNum).zfill(7)
			if repKey in sim:
				speciesCountHandle = sim[repKey]['SpeciesCounts']
				trace = np.array(speciesCountHandle[:, specieIdx])
				
				# Average if necessary
				if doublingTime != None:
					times = getTimesteps(f)
					for i, t in enumerate(times):
						tmod = t % doublingTime
						trace[i] = 2.0 * np.log(2.0) * trace[i] / (2.0 ** (tmod / doublingTime))
				
				allTraces[repNum] = trace
		
		# close file if need be
		if wasString:
			closeLMFile(f)
		
		return allTraces
	
	# Single replicate case (original logic)
	speciesCountHandle = sim[str(replicate).zfill(7)]['SpeciesCounts']
	trace = np.array(speciesCountHandle[:, specieIdx])

	# Average if necessary
	if doublingTime != None:
		times = getTimesteps(f)

		for i, t in enumerate(times):
			tmod = t % doublingTime
			trace[i] = 2.0 * np.log(2.0) * trace[i] / (2.0 ** (tmod / doublingTime))

	# close file if need be
	if wasString:
		closeLMFile(f)

	# pull out trace
	return trace



[docs]
def getAvgVarTrace(f, specie, doublingTime = None):
	"""Get the average and variance of the specie trace over time

    Args:
        f:
            The HDF5 file handle to extract from or the name of a file to open
        specie:
            The specie to extract
        doublingTime:
            An optional doubling time parameter that will normalize average each trace by time 
            in the cell cycle assuming exponentially growing cell (2log2 * x/2^(t/DT)), 
            effectively normalizing against cell size. Default: no averaging is performed

    Returns:
        avg, var, time
    """
	# Open file if a string is passed
	wasString = False
	if isinstance(f, str):
		f = openLMFile(f)
		wasString = True

	# Get a handle of the simulation group where results are stored
	sim=f['Simulations']

	# Count replicates
	replicates=len(sim)
	if(replicates == 0):
		LMLogger.warning("No simulations have been performed.")
		if wasString:
			closeLMFile(f)
		raise Exception("Could not return timesteps, no simulations have been performed.")
	
	# Grab the times
	times = getTimesteps(f)

	# Grab each specie trace into average
	lenT = len(times)
	data = np.zeros((lenT,replicates),dtype=float)
	for i in range(1,replicates+1):
		# Don't pass doublingTime here because it would cause 'getSpecieTrace'
		#  to load the times for every call, which could be very costly
		data[:,i-1] = getSpecieTrace(f, specie, i) 

	# Average if necessary
	if doublingTime != None:
		for i,t in enumerate(times):
			tmod = t % doublingTime
			data[i,:] = 2.0*np.log(2.0)*data[i,:] / 2.0**(tmod/doublingTime)

	# Compute the average/variance/etc.
	avg = np.average(data,axis = 1)
	var = np.var(data,axis = 1)

	# close file if need be
	if wasString:
		closeLMFile(f)

	return avg, var, times




[docs]
def getHistogram(f, species):
	"""Get the histogram for a specie

    Args:
        f:
            The HDF5 file handle to extract from or the name of a file to open
        specie:
            An array of specie/s to extract. returns a nD histogram of the data.

    Returns:
        bins, edges (len(bins)+1)
    """
	# Open file if a string is passed
	wasString = False
	if isinstance(f, str):
		f = openLMFile(f)
		wasString = True

	# Get a handle of the simulation group where results are stored
	sim=f['Simulations']

	# Count replicates
	replicates=len(sim)
	if(replicates == 0):
		LMLogger.warning("No simulations have been performed.")
		if wasString:
			closeLMFile(f)

	# Grab the times
	times = getTimesteps(f)

	# Grab each specie trace into average
	bins = None
	edges = None
	lenT = len(times)
	if len(species) == 1:
		data = np.zeros((lenT,replicates),dtype=int)
		for i in range(1,replicates+1):
			data[:,i-1] = getSpecieTrace(f, species[0], i) 

		# Compute the histogram for the specie
		bins, edges = np.histogram(data, bins=np.ptp(data), density=True)
	elif len(species) == 2:
		data1 = np.zeros((lenT,replicates),dtype=int)
		for i in range(1,replicates+1):
			data1[:,i-1] = getSpecieTrace(f, species[0], i) 
		data = np.zeros((lenT,replicates),dtype=int)
		for i in range(1,replicates+1):
			data2[:,i-1] = getSpecieTrace(f, species[1], i) 

		# Compute the histogram for the specie
		bins, edges = np.histogram2d(data1.flatten(), data2.flatten(), bins=(np.ptp(data1), np.ptp(data2)), normed=True)
	else:
		data = np.zeros( (replicates*lenT, len(species)) )
		for i in range(1, replicates+1):
			for j,s in enumerate(species):
				rep = replicates - 1
				data[rep*lenT:((rep+1)*lenT-1), j] = getSpecieTrace(f, s, i)

		bins = np.amax(data,axis=1) - np.amin(data,axis=1)
		bins, edges = np.histogramdd(data, bins=bins, normed=True)

	# Close file if need be
	if wasString:
		closeLMFile(f)

	return bins, edges

	

[docs]
def getPhaseSpace(f, species, replicate):
	"""Get the nD phase space associated with the traces of species. 
    
    If a single replicate is specified, a single trace will be returned, otherwise a 2/3D density matrix will be returned.
    
    Args:
        f:
            The HDF5 file handle to extract from or the name of a file to open
        specie:
            An iterable of 2 more specie names
        replicate:
            The replicate for which to extract the phase space

    Returns:
        A numpy array of a trace from a single replicate (specie1, specie2, ...) 
    """
	dimension = -1
	if len(species) < 2:
		raise Exception("Phase space must be either 2 or 3 dimensional.")
	dimension = len(species)

	# Open file if a string is passed
	wasString = False
	if isinstance(f, str):
		f = openLMFile(f)
		wasString = True

	# Determine whether we are to obtain just a specie trace or a histogram in that many dimensions
	# Compute just a trace
	times = getTimesteps(f)
	lenT = len(times)
	# Grab each specie trace into average
	data = np.zeros((lenT,dimension),dtype=float)
	for i,s in enumerate(species):
		data[:,i] = getSpecieTrace(f, s, replicate)

	# Close file if need be
	if wasString:
		closeLMFile(f)

	return data



[docs]
def getNumTrajectories(f):
	"""Get the number of trajectories (replicates) in the file
	
	Args:
		f:
			The HDF5 file handle to extract from or the name of a file to open
	
	Returns:
		int: Number of trajectories/replicates in the file
	"""
	# Open file if a string is passed
	wasString = False
	if isinstance(f, str):
		f = openLMFile(f)
		wasString = True
	
	# Get a handle to the Simulations group, where results are stored
	sim = f['Simulations']
	
	# Count replicates
	replicates = len(sim)
	
	# close file if need be
	if wasString:
		closeLMFile(f)
	
	return replicates



[docs]
def getTrajectory(f, replicate, specie):
	"""Get trajectory data for a specific replicate and species
	
	Args:
		f:
			The HDF5 file handle to extract from or the name of a file to open
		replicate:
			The replicate number (0-indexed for compatibility with your usage)
		specie:
			The species name to extract
	
	Returns:
		numpy.ndarray: The species trajectory data
	"""
	# Convert from 0-indexed to 1-indexed replicate numbering
	replicate_num = replicate + 1
	
	# Use existing getSpecieTrace function
	return getSpecieTrace(f, specie, replicate_num)