tools.addNoise

A little utility to add some noise to training data.

Warning

This tool will be removed in BPReveal 6.0. It turns out that it’s not very useful.

BNF

<add-noise-configuration> ::=
    {
        "input-h5" : <file-name>,
        "output-h5" : <file-name>,
        <output-size-section>,
        "keep-original-data" : <boolean>,
        "sequence-fraction-mutated" : <number>,
        "sequence-distribution-fraction" : <number>,
        "profile-mutation-types" : [<list-of-profile-mutation-types>],
        "num-threads" : <integer>,
        <verbosity-section>
    }
<output-size-section> ::=
    "num-output-samples" : <integer>
  | "output-size-ratio" : <number>
<list-of-profile-mutation-types> ::=
    <profile-mutation-type>
  | <profile-mutation-type>, <list-of-profile-mutation-types>
<profile-mutation-type> ::=
    <add-mutation-type>
  | <shift-mutation-type>
  | <subtract-mutation-type>
<add-mutation-type> ::=
    {
        "type" : "add",
        "maximum-reads" : <number-or-null>,
        "minimum-reads" : <integer>,
        "max-change" : <integer>,
        <profile-mutation-common-parameters>
    }
<shift-mutation-type> ::=
    {
        "type" : "shift",
        "shift-max-distance" : <integer>,
        "shift-reads-independently" : <boolean>,
        <profile-mutation-common-parameters>
    }
<subtract-mutation-type> ::=
    {
        "type" : "subtract",
        "maximum-reads" : <number-or-null>,
        "minimum-reads" : <integer>,
        "max-change" : <integer>,
        <profile-mutation-common-parameters>
    }
<profile-mutation-common-parameters> ::=
     "fraction-mutated" : <number>,
     "output-distribution-fraction" : <number>

Parameter notes

input-h5

The name of the hdf5 file generated by prepareTrainingData

output-h5

The name of the output file that will be generated.

num-output-samples

How many training examples do you want in the output file? Mutually exclusive with output-size-ratio

output-size-ratio

How many times larger should the output be than the input?

keep-original-data

Should the original data be kept in the output, or just the altered data?

sequence-mutation-fraction

What fraction of the input bases should be randomly mutated? For example, 0.05 means that one in twenty bases will be randomly mutated to a different base.

profile-mutation-types

A list of the types of mutation you want to apply to the profile outputs.

profile-mutation-fraction

What fraction of the output bases should be mutated?

bpreveal.tools.addNoise.main(config)

Run the program.

Parameters:

config (dict) – The configuration json.

Schema

{
    "$schema": "http://json-schema.org/draft-07/schema#",
    "title": "addNoise",
    "description": "Schema for :py:mod:addNoise<bpreveal.tools.addNoise>",
    "definitions": {
        "add-mutation-type": {
            "type": "object",
            "properties": {
                "type": {
                    "const": "add"
                },
                "maximum-reads": {
                    "oneOf": [
                        {
                            "type": "number",
                            "minimum": 0
                        },
                        {
                            "const": null
                        }
                    ]
                },
                "minimum-reads": {
                    "type": "integer",
                    "minimum": 0
                },
                "max-change": {
                    "type": "integer"
                },
                "fraction-mutated": { "$ref": "/schema/base#/definitions/fraction"},
                "output-distribution-fraction":  { "$ref": "/schema/base#/definitions/fraction"}
            },
            "additionalProperties": false,
            "required": [
                "type",
                "maximum-reads",
                "minimum-reads",
                "max-change",
                "fraction-mutated",
                "output-distribution-fraction"
            ]
        },
        "subtract-mutation-type": {
            "type": "object",
            "properties": {
                "type": {
                    "const": "subtract"
                },
                "maximum-reads": {
                    "oneOf": [
                        {
                            "type": "number",
                            "minimum": 1
                        },
                        {
                            "const": null
                        }
                    ]
                },
                "minimum-reads": {
                    "type": "integer",
                    "minimum": 1
                },
                "max-change": {
                    "type": "integer"
                },
                "fraction-mutated":  { "$ref": "/schema/base#/definitions/fraction"},
                "output-distribution-fraction":  { "$ref": "/schema/base#/definitions/fraction"}
            },
            "additionalProperties": false,
            "required": [
                "type",
                "maximum-reads",
                "minimum-reads",
                "max-change",
                "fraction-mutated",
                "output-distribution-fraction"
            ]
        },
        "shift-mutation-type": {
            "type": "object",
            "properties": {
                "type": {
                    "const": "shift"
                },
                "shift-max-distance": {
                    "type": "integer",
                    "minimum": 1
                },
                "shift-reads-independently": {
                    "type": "boolean"
                },
                "fraction-mutated":  { "$ref": "/schema/base#/definitions/fraction"},
                "output-distribution-fraction":  { "$ref": "/schema/base#/definitions/fraction"}
            },
            "additionalProperties": false,
            "required": [
                "type",
                "shift-max-distance",
                "shift-reads-independently",
                "fraction-mutated",
                "output-distribution-fraction"
            ]
        }
    },
    "type": "object",
    "properties": {
        "input-h5": {
            "type": "string"
        },
        "output-h5": {
            "type": "string"
        },
        "num-output-samples": {
            "type": "integer"
        },
        "output-size-ratio": {
            "type": "number"
        },
        "keep-original-data": {
            "type": "boolean"
        },
        "sequence-fraction-mutated": {
            "type": "number",
            "minimum": 0,
            "maximum": 1
        },
        "sequence-distribution-fraction": {
            "type": "number",
            "minimum": 0,
            "maximum": 1
        },
        "profile-mutation-types": {
            "type": "array",
            "items": {
                "anyOf": [
                    {
                        "$ref": "#/definitions/add-mutation-type"
                    },
                    {
                        "$ref": "#/definitions/subtract-mutation-type"
                    },
                    {
                        "$ref": "#/definitions/shift-mutation-type"
                    }
                ]
            }
        },
        "num-threads": {
            "type": "integer"
        },
        "verbosity": {
            "$ref": "/schema/base#/definitions/verbosity"
        }
    },
    "required": [
        "input-h5",
        "output-h5",
        "keep-original-data",
        "sequence-fraction-mutated",
        "sequence-distribution-fraction",
        "profile-mutation-types",
        "num-threads",
        "verbosity"
    ],
    "oneOf": [
        {
            "required": [
                "num-output-samples"
            ],
            "not": {
                "required": [
                    "output-size-ratio"
                ]
            }
        },
        {
            "required": [
                "output-size-ratio"
            ],
            "not": {
                "required": [
                    "num-output-samples"
                ]
            }
        }
    ]
}