Saturday, September 24, 2011

FASTQ spliiter

This a simple python code for splitting FASTQ file into n FASTQ files where the reads are evenly distributed among those files


#!/usr/bin/python
import os
import sys
import math
inputPath = sys.argv[1]
outputDirect = sys.argv[2]
filesCount = int(sys.argv[3])
input = open(inputPath)
reads = 0
for line in input:
        if line.startswith("@"):
                reads += 1
input = open(inputPath)
readsInFile = int(math.ceil(float(reads)/filesCount))
fileIndex = 0
readIndex = 0
try:
        os.mkdir(outputDirect)
except:
        pass
output = open(os.path.join(outputDirect,str(fileIndex)),"w")
for line in input:
        if line.startswith("@"):
                readIndex += 1
        if readIndex == readsInFile:
                readIndex = 0
                fileIndex += 1
                output = open(os.path.join(outputDirect,str(fileIndex)),"w")
        output.write(line)

Usage

python split.py "path_to_FASTQ_file" "output_directory" "number_of_output_files"


No comments:

Post a Comment