This a simple python code for splitting FASTQ file into n FASTQ files where the reads are evenly distributed among those files
Usage
python split.py "path_to_FASTQ_file" "output_directory" "number_of_output_files"
#!/usr/bin/python
import os
import sys
import math
inputPath = sys.argv[1]
outputDirect = sys.argv[2]
filesCount = int(sys.argv[3])
input = open(inputPath)
reads = 0
for line in input:
if line.startswith("@"):
reads += 1
input = open(inputPath)
readsInFile = int(math.ceil(float(reads)/filesCount))
fileIndex = 0
readIndex = 0
try:
os.mkdir(outputDirect)
except:
pass
output = open(os.path.join(outputDirect,str(fileIndex)),"w")
for line in input:
if line.startswith("@"):
readIndex += 1
if readIndex == readsInFile:
readIndex = 0
fileIndex += 1
output = open(os.path.join(outputDirect,str(fileIndex)),"w")
output.write(line)
Usage
python split.py "path_to_FASTQ_file" "output_directory" "number_of_output_files"