tools.addNoise
A little utility to add some noise to training data.
Warning
This tool will be removed in BPReveal 6.0. It turns out that it’s not very useful.
BNF
<add-noise-configuration> ::= { "input-h5" : <file-name>, "output-h5" : <file-name>, <output-size-section>, "keep-original-data" : <boolean>, "sequence-fraction-mutated" : <number>, "sequence-distribution-fraction" : <number>, "profile-mutation-types" : [<list-of-profile-mutation-types>], "num-threads" : <integer>, <verbosity-section> }
<output-size-section> ::= "num-output-samples" : <integer> | "output-size-ratio" : <number>
<list-of-profile-mutation-types> ::= <profile-mutation-type> | <profile-mutation-type>, <list-of-profile-mutation-types>
<profile-mutation-type> ::= <add-mutation-type> | <shift-mutation-type> | <subtract-mutation-type>
<add-mutation-type> ::= { "type" : "add", "maximum-reads" : <number-or-null>, "minimum-reads" : <integer>, "max-change" : <integer>, <profile-mutation-common-parameters> }
<shift-mutation-type> ::= { "type" : "shift", "shift-max-distance" : <integer>, "shift-reads-independently" : <boolean>, <profile-mutation-common-parameters> }
<subtract-mutation-type> ::= { "type" : "subtract", "maximum-reads" : <number-or-null>, "minimum-reads" : <integer>, "max-change" : <integer>, <profile-mutation-common-parameters> }
<profile-mutation-common-parameters> ::= "fraction-mutated" : <number>, "output-distribution-fraction" : <number>
Parameter notes
- input-h5
The name of the hdf5 file generated by
prepareTrainingData- output-h5
The name of the output file that will be generated.
- num-output-samples
How many training examples do you want in the output file? Mutually exclusive with output-size-ratio
- output-size-ratio
How many times larger should the output be than the input?
- keep-original-data
Should the original data be kept in the output, or just the altered data?
- sequence-mutation-fraction
What fraction of the input bases should be randomly mutated? For example, 0.05 means that one in twenty bases will be randomly mutated to a different base.
- profile-mutation-types
A list of the types of mutation you want to apply to the profile outputs.
- profile-mutation-fraction
What fraction of the output bases should be mutated?
- bpreveal.tools.addNoise.main(config)
Run the program.
- Parameters:
config (dict) – The configuration json.
Schema
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "addNoise",
"description": "Schema for :py:mod:addNoise<bpreveal.tools.addNoise>",
"definitions": {
"add-mutation-type": {
"type": "object",
"properties": {
"type": {
"const": "add"
},
"maximum-reads": {
"oneOf": [
{
"type": "number",
"minimum": 0
},
{
"const": null
}
]
},
"minimum-reads": {
"type": "integer",
"minimum": 0
},
"max-change": {
"type": "integer"
},
"fraction-mutated": { "$ref": "/schema/base#/definitions/fraction"},
"output-distribution-fraction": { "$ref": "/schema/base#/definitions/fraction"}
},
"additionalProperties": false,
"required": [
"type",
"maximum-reads",
"minimum-reads",
"max-change",
"fraction-mutated",
"output-distribution-fraction"
]
},
"subtract-mutation-type": {
"type": "object",
"properties": {
"type": {
"const": "subtract"
},
"maximum-reads": {
"oneOf": [
{
"type": "number",
"minimum": 1
},
{
"const": null
}
]
},
"minimum-reads": {
"type": "integer",
"minimum": 1
},
"max-change": {
"type": "integer"
},
"fraction-mutated": { "$ref": "/schema/base#/definitions/fraction"},
"output-distribution-fraction": { "$ref": "/schema/base#/definitions/fraction"}
},
"additionalProperties": false,
"required": [
"type",
"maximum-reads",
"minimum-reads",
"max-change",
"fraction-mutated",
"output-distribution-fraction"
]
},
"shift-mutation-type": {
"type": "object",
"properties": {
"type": {
"const": "shift"
},
"shift-max-distance": {
"type": "integer",
"minimum": 1
},
"shift-reads-independently": {
"type": "boolean"
},
"fraction-mutated": { "$ref": "/schema/base#/definitions/fraction"},
"output-distribution-fraction": { "$ref": "/schema/base#/definitions/fraction"}
},
"additionalProperties": false,
"required": [
"type",
"shift-max-distance",
"shift-reads-independently",
"fraction-mutated",
"output-distribution-fraction"
]
}
},
"type": "object",
"properties": {
"input-h5": {
"type": "string"
},
"output-h5": {
"type": "string"
},
"num-output-samples": {
"type": "integer"
},
"output-size-ratio": {
"type": "number"
},
"keep-original-data": {
"type": "boolean"
},
"sequence-fraction-mutated": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"sequence-distribution-fraction": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"profile-mutation-types": {
"type": "array",
"items": {
"anyOf": [
{
"$ref": "#/definitions/add-mutation-type"
},
{
"$ref": "#/definitions/subtract-mutation-type"
},
{
"$ref": "#/definitions/shift-mutation-type"
}
]
}
},
"num-threads": {
"type": "integer"
},
"verbosity": {
"$ref": "/schema/base#/definitions/verbosity"
}
},
"required": [
"input-h5",
"output-h5",
"keep-original-data",
"sequence-fraction-mutated",
"sequence-distribution-fraction",
"profile-mutation-types",
"num-threads",
"verbosity"
],
"oneOf": [
{
"required": [
"num-output-samples"
],
"not": {
"required": [
"output-size-ratio"
]
}
},
{
"required": [
"output-size-ratio"
],
"not": {
"required": [
"num-output-samples"
]
}
}
]
}