Difference between revisions of "Public:CharacterModelObjectModel"

From Phyloinformatics
Jump to: navigation, search
 
(One intermediate revision by one other user not shown)
Line 1: Line 1:
 +
Currently different programmers are coming up with their own model definition languages for character substitution models. This is anticipated to cause problems for users in the future, impeding ready exchange and rapid validation of models across the array of programs that apply such models.
 +
 +
Here we develop an interface definition language that encapsulates current (and future) substitution models. Our planned long-term efforts are:
 +
* Add wrappers for Cipres and Hyphy after the meeting
 +
* Creating own instance of an IDL for describing statistical models
 +
* Create and test an XML format that applications can exchange (e.g. HyPhy, PAUP, MrBayes, BEAST, CIPRES, Mesquite, Garli)
 +
 
<pre>
 
<pre>
typedef string ID; // used for fields that identify unique objects (and is hashable)
+
typedef long UID; // used for fields that identify unique objects (and is hashable)
 +
typedef long ParamID;
 +
typedef long ExprID;
  
 +
module CipresIDL_api2
 +
{
 
/* Describes the specific category of an alphabet or which general category it  
 
/* Describes the specific category of an alphabet or which general category it  
 
falls under.
 
falls under.
Line 17: Line 28:
 
     };
 
     };
  
typedef Sequence<int> AmbiguityCode;
+
typedef sequence<long> AmbiguityCode;
 
struct DatatypeTransformationRule
 
struct DatatypeTransformationRule
 
{
 
{
Sequence< AmbiguityCode > mapping; // -1 means "drop this state"
+
sequence< AmbiguityCode > mapping; // -1 means "drop this state"
 
};
 
};
  
 
struct Datatype
 
struct Datatype
 
{
 
{
DatatypeEnum datatypeEnum;
+
DatatypeEnum datatypeEnumVar;
ID id; /*  unique identifier */
+
UID id; /*  unique identifier */
Sequence<string> stateNames;  /* state labels (or "0", "1"... if the type is generic)*/
+
sequence<string> stateNames;  /* state labels (or "0", "1"... if the type is generic)*/
Sequence<DatatypeTransformationRule> transformationRules;
+
sequence<DatatypeTransformationRule> transformationRules;
 
};
 
};
  
 
enum ParamScopeEnum
 
enum ParamScopeEnum
 
{
 
{
GLOBAL, SUBSET, EDGE, EDGE_SUBSET;
+
GLOBAL, SUBSET, EDGE, EDGE_SUBSET
 
};
 
};
  
 
enum ParamValueEnum
 
enum ParamValueEnum
 
{
 
{
MARGINALIZED, FIXED, TO_ESTIMATE, ESTIMATED;
+
MARGINALIZED, FIXED_VALUE, TO_ESTIMATE, ESTIMATED
 
};
 
};
  
 
enum BoundTypeEnum
 
enum BoundTypeEnum
 
{
 
{
NONE, OPEN, CLOSED;
+
NONE, OPEN, CLOSED
 
};
 
};
 
 
Line 66: Line 77:
 
enum OpCodeEnum  
 
enum OpCodeEnum  
 
{
 
{
PARAMETER, NEGATE, ADDITION, DIVISION, MULTIPLICATION, SUBTRACTION;
+
PARAMETER_VALUE, NEGATE, ADDITION, DIVISION, MULTIPLICATION, SUBTRACTION
 
     };
 
     };
 
      
 
      
Line 72: Line 83:
 
{
 
{
 
OpCodeEnum opCode;
 
OpCodeEnum opCode;
IDString paramRef; // id of the parameter  
+
UID paramRef; // id of the parameter  
 
};
 
};
 
 
 
struct Expression
 
struct Expression
 
{
 
{
ExprIDString id;
+
ExprID id;
Sequence<ExpressionTerm> expr; // reverse polish notation
+
sequence<ExpressionTerm> expr; // reverse polish notation
 
};
 
};
  
 
struct ProbabilityDensity
 
struct ProbabilityDensity
 
{
 
{
+
sequence<double> densityParams;
 
};
 
};
  
Line 89: Line 100:
 
{
 
{
 
ProbabilityDensity dist;
 
ProbabilityDensity dist;
Sequence<ParamIDString> parameters;
+
sequence<ParamID> parameters;
 
};
 
};
  
 
enum ParamConstraintEnum
 
enum ParamConstraintEnum
 
{
 
{
SUM_EQUALS_VALUE, MEAN_EQUALS_VALUE;
+
SUM_EQUALS_VALUE, MEAN_EQUALS_VALUE
 
};
 
};
 
 
 
struct ParameterConstraint
 
struct ParameterConstraint
 
{
 
{
ParamConstraintEnum paramConstraintEnum;
+
ParamConstraintEnum paramConstraintEnumVar;
 
double value;
 
double value;
Sequence<ParamIDString> parameters;
+
sequence<ParamID> parameters;
 
};
 
};
  
Line 109: Line 120:
 
SYMM_COMPONENT_MATRIX, //  
 
SYMM_COMPONENT_MATRIX, //  
 
MIXTURE
 
MIXTURE
;
 
 
};
 
};
 +
 +
struct DiscreteCharacterModel;
 +
 +
typedef sequence<DiscreteCharacterModel> DiscreteCharacterModelSeq;
  
 
struct DiscreteCharacterModel
 
struct DiscreteCharacterModel
 
{
 
{
IDString datatypeReference; /* Describes the type of data that the model can be applied to.
+
UID datatypeReference; /* Describes the type of data that the model can be applied to.
 
a datatype object can be one of the common types or a
 
a datatype object can be one of the common types or a
 
description of constraints (e.g. 4-state)  */
 
description of constraints (e.g. 4-state)  */
 
boolean isReversible;
 
boolean isReversible;
 
 
Sequence<Expression> expressions;  
+
sequence<Expression> expressions;  
Sequence<Parameter> parameters;
+
sequence<Parameter> parameters;
  
 
ModelDescriptionEnum descriptionType;
 
ModelDescriptionEnum descriptionType;
Line 126: Line 140:
  
 
/** used if descriptionType == Q_MATRIX */
 
/** used if descriptionType == Q_MATRIX */
Sequence< Sequence<ExprIDString> > qMatrix;  
+
sequence< sequence<ExprID> > qMatrix;  
  
 
/** used if descriptionType == SYMM_COMPONENT_MATRIX */
 
/** used if descriptionType == SYMM_COMPONENT_MATRIX */
Sequence< Sequence<ExprIDString> > symmComponentOfQMatrix;  
+
sequence< sequence<ExprID> > symmComponentOfQMatrix;  
Sequence<ExprIDString> equilStateFreq;
+
sequence<ExprID> equilStateFreq;
  
Sequence<ParameterConstraint> constraints;
+
sequence<ParameterConstraint> constraints;
Sequence<Prior> priors;
+
sequence<Prior> priors;
 
 
 
/** used if descriptionType == MIXTURE */
 
/** used if descriptionType == MIXTURE */
Sequence<DiscreteCharacterModel> subModels;
+
DiscreteCharacterModelSeq subModels;
Sequence<ParamIDString> mixtureProportions;
+
sequence<ParamID> mixtureProportions;
 
Prior mixtureProportionPrior;
 
Prior mixtureProportionPrior;
 
 
ExprIDString rateMultiplier;
+
ExprID rateMultiplier;
 
 
 
};
 
};
 +
};
  
 
</pre>
 
</pre>

Latest revision as of 19:23, 12 January 2007

Currently different programmers are coming up with their own model definition languages for character substitution models. This is anticipated to cause problems for users in the future, impeding ready exchange and rapid validation of models across the array of programs that apply such models.

Here we develop an interface definition language that encapsulates current (and future) substitution models. Our planned long-term efforts are:

  • Add wrappers for Cipres and Hyphy after the meeting
  • Creating own instance of an IDL for describing statistical models
  • Create and test an XML format that applications can exchange (e.g. HyPhy, PAUP, MrBayes, BEAST, CIPRES, Mesquite, Garli)
typedef long UID; // used for fields that identify unique objects (and is hashable)
typedef long ParamID; 
typedef long ExprID;

module CipresIDL_api2
{
/* Describes the specific category of an alphabet or which general category it 
	falls under.
*/
enum DatatypeEnum {
        DNA_DATATYPE, //ACGT
        RNA_DATATYPE, //ACGU
        AA_DATATYPE, 
        CODON_DATATYPE, // AAA, AAC, AAG, AAT, ...

        CATEGORICAL_DATATYPE, // the generic code for discrete characters ("Standard" in NEXUS) 
        CONTINUOUS_DATATYPE,
        DISTANCE_DATATYPE,   // used for taxon to taxon distances (distances block in NEXUS)
        RESTRICTION_FRAGMENT
    };

typedef sequence<long> AmbiguityCode;
struct DatatypeTransformationRule
	{
		sequence< AmbiguityCode > mapping; // -1 means "drop this state"
	};

struct Datatype
	{
		DatatypeEnum datatypeEnumVar;
		UID id; /*  unique identifier */
		sequence<string> stateNames;  /* state labels (or "0", "1"... if the type is generic)*/
		sequence<DatatypeTransformationRule> transformationRules;
	};

enum ParamScopeEnum
	{
		GLOBAL, SUBSET, EDGE, EDGE_SUBSET
	};

enum ParamValueEnum
	{
		MARGINALIZED, FIXED_VALUE, TO_ESTIMATE, ESTIMATED
	};

enum BoundTypeEnum
	{
		NONE, OPEN, CLOSED
	};
	
struct Bound
	{
		BoundTypeEnum   type;
		double			value;
	};
	
struct Parameter
	{
		ParamID id;
		string name;
		ParamScopeEnum scope;
		ParamValueEnum valueMeaning;

		double value;
		Bound  lower;
		Bound  upper; 
	};

enum OpCodeEnum 
	{
		PARAMETER_VALUE, NEGATE, ADDITION, DIVISION, MULTIPLICATION, SUBTRACTION
    };
    
struct ExpressionTerm
	{
		OpCodeEnum	opCode;
		UID			paramRef; // id of the parameter 
	};
	
struct Expression
	{
		ExprID	id;
		sequence<ExpressionTerm> expr; // reverse polish notation
	};

struct ProbabilityDensity
	{
		sequence<double> densityParams;
	};

struct Prior
	{
		ProbabilityDensity dist;
		sequence<ParamID> parameters;
	};

enum ParamConstraintEnum
	{
		SUM_EQUALS_VALUE, MEAN_EQUALS_VALUE
	};
	
struct ParameterConstraint
	{
		ParamConstraintEnum paramConstraintEnumVar;
		double value;
		sequence<ParamID> parameters;
	};

enum ModelDescriptionEnum
	{
		Q_MATRIX, //all expressions are in the QMatrix field of the DiscreteCharacterModel
		SYMM_COMPONENT_MATRIX, // 
		MIXTURE
	};

struct DiscreteCharacterModel;

typedef sequence<DiscreteCharacterModel> DiscreteCharacterModelSeq;

struct DiscreteCharacterModel
	{
		UID			datatypeReference; /* Describes the type of data that the model can be applied to.
											a datatype object can be one of the common types or a
											description of constraints (e.g. 4-state)  */
		boolean 	isReversible;
		
		sequence<Expression> expressions; 
		sequence<Parameter>	parameters;

		ModelDescriptionEnum descriptionType;
		

			/** used if descriptionType == Q_MATRIX */
		sequence< sequence<ExprID> > qMatrix; 

			/** used if descriptionType == SYMM_COMPONENT_MATRIX */
		sequence< sequence<ExprID> > 	symmComponentOfQMatrix; 
		sequence<ExprID>				equilStateFreq;

		sequence<ParameterConstraint> constraints;
		sequence<Prior>	priors;
		
			/** used if descriptionType == MIXTURE */
		DiscreteCharacterModelSeq subModels;
		sequence<ParamID>	mixtureProportions;
		Prior	mixtureProportionPrior;
		
		ExprID rateMultiplier;
		
	};
};