Upset plot:进阶版Venn plot实现多元素可视化

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("UpSetR")
library(UpSetR)
pathway=read.table("carp_combined_id1.xls",header=T,sep="\t")
upset(pathway,nsets = 9, nintersects = 50,mb.ratio = c(0.5, 0.5),
      order.by = "freq", decreasing = c(TRUE,FALSE),
      queries = list(list(query=intersects, params=list("datra", "caaur","meamb","onmac","ctide","cycar","sigra","sians","sirhi"), color="red", active=T)))
#详细参数查看官网
#行列转置awk '{for(i=0;++i<=NF;)a[i]=a[i]?a[i] FS $i:$i}END{for(i=0;i++ carp_combined_absentid_convert.txt

R画折线图

library(ggplot2)
library(gcookbook)
data <- read.table("test.xls",header=T,sep="\t")
ggplot(data,aes(x=Species, y=CDS))+ geom_line(color="red") + geom_point(size=2, shape=16,color="red") + ylim(0,1750) + theme_classic() + scale_x_discrete(limits=c("darer","datra","ctide","meamb","onmac","cycar","caaur","sirhi","sians","sigra"))
#scale_x_discrete修改x轴坐标,不改变图的数值
#theme_classic显示X、Y轴线

多物种全基因组比对得到保守的DNA序列

查阅不同文献和教程发现获得多物种保守的DNA序列(编码区和非编码区)主要过程有:
1.Repeat mask:通过RepeatMasker和RepeatModeler获得
2.Pairwise alignment: 用到的软件主要有last、lastz、blastz
3.Chaining: axtChain
4.Netting: chainNet
5.Mafing:
6.Combine multiple pairwise results:
7.PhastCons: PHAST

详细步骤如下

第一步:从数据库下载重复序列屏蔽后的基因组fasta文件,对自己组装的序列可以通过Geta获得
第二步:前边介绍过last的使用,但看文献发现使用lastz的比较多,有关last和lastz的比较(last aligner is considered faster and memory efficient. It creates maf file, which can converted to psl files. Then the same following processes can be used on psl files. Different from lastz, last aligner starts with fasta files. The target genome sequence has to build the index file first, and then align with the query genome sequence.),操作上last使用起来更加简单,参数选择较少,目前还不知道两者结果的异同(服务器正在运行,结果出来更新)。lastz和blastz的不同
lastz数据预处理:

for i in `cat 00.initial_data/reflect.txt | cut -f 1`
do
    echo "faToTwoBit 00.initial_data/$i.genome.fasta 00.initial_data/$i.genome.2bit"
done > fa2bit.list
ParaFly -c fa2bit.list -CPU 19

for i in `cat 00.initial_data/reflect.txt | cut -f 1`
do
    echo "twoBitInfo 00.initial_data/$i.genome.2bit stdout | sort -k2rn > $i.chrom.sizes"
done > chrom.sizes.list
ParaFly -c chrom.sizes.list -CPU 19
for i in `cat 00.initial_data/reflect.txt | cut -f 1`
do
    mkdir ${i}PartList
done
for i in `cat 00.initial_data/reflect_no_darer.txt | cut -f 1`
do
    echo "/opt/biosoft/userApps/kent/src/hg/utils/automation/partitionSequence.pl 10000000 0 00.initial_data/$i.genome.2bit $i.chrom.sizes 1 -lstDir ${i}PartList > $i.part.list"
done > query_partitionSequence.list
ParaFly -c query_partitionSequence.list -CPU 18
/opt/biosoft/userApps/kent/src/hg/utils/automation/partitionSequence.pl 20000000 10000 00.initial_data/darer.genome.2bit darer.chrom.sizes 1 -lstDir darerPartList > darer.part.list
grep -v PartList darer.part.list > darer.list
for i in `cat 00.initial_data/reflect_no_darer.txt | cut -f 1`
do
    echo "grep -v PartList $i.part.list > $i.list"
done > 1111.lsit
ParaFly -c 1111.lsit -CPU 18
for i in `cat 00.initial_data/reflect_no_darer.txt | cut -f 1`
do
    echo "cat ${i}PartList/*.lst >> $i.list"
done > cat_Part.list
ParaFly -c cat_Part.list -CPU 18
/opt/biosoft/userApps/kent/src/hg/utils/automation/constructLiftFile.pl darer.chrom.sizes darer.list > darer.lift
for i in `cat 00.initial_data/reflect_no_darer.txt | cut -f 1`
do
    echo "/opt/biosoft/userApps/kent/src/hg/utils/automation/constructLiftFile.pl $i.chrom.sizes $i.list > $i.lift"
done > constructLiftFile.list
ParaFly -c constructLiftFile.list -CPU 18
for i in `cat 00.initial_data/reflect.txt | cut -f 1`
do
    mkdir $i
    for x in `cat $i.list`
    do
        y=${x/*2bit:/}
        echo "twoBitToFa $x $i/$y.fa"
    done >> twoBitToFa.list
done
#去除长度小于1000bp的序列
for i in `cat 00.initial_data/reflect_no_darer.txt | cut -f 1`
do
    for x in $i/*fa
    do
        y=${x/*-/}
        k=${y/.fa/}
        if [ $k -le 1000 ]
        then
            rm $x
        fi

    done
done

ParaFly -c twoBitToFa.list -CPU 80
for i in darer/*fa
do
    for x in `cat 00.initial_data/reflect_no_darer.txt | cut -f 1`
    do
        for y in $x/*fa
        do
            echo "lastz $i $y --strand=both --seed=12of19 --notransition --chain --gapped --gap=400,30 --hspthresh=3000 --gappedthresh=3000 --inner=2000 --masking=50 --ydrop=9400 --scores=/opt/biosoft/GenomeAlignmentTools/HoxD55.q --format=axt > ${x}_axt/$i.$y.axt"
        done >> lastz_all.list
    done
done

#lastz的运行速度太慢,若分析的物种太多没有超算不推荐使用

第三步:Chaining,将相邻的block连接起来,打分矩阵和blastz相同,gap打分改变

for i in `cat 00.initial_data/reflect_no_darer.txt | cut -f 1`
do
    echo "axtChain -linearGap=loose -psl $i.psl darer.genome.2bit $i.genome.2bit $i.Todarer.chain"
done > chain_axtChain.list
ParaFly -c chain_axtChain.list -CPU 18

第四步:Netting:chainNet,对target序列确定最优比对序列。
1.首先将所有的染色体或scaffold的碱基标记未用的。
2.按打分由高到低排列,形成列表。
3.迭代:每次从列表中取出一个chain,扔掉与已经存在的chain有overlap的区域,余下的部分添加上去,如果和之前的chain有gap,标记成子集,通过这种方式形成的层级称为net。记录overlap的区域,用于下一步识别重复。

chainMergeSort $output_dir/3.chain/*.chain > $output_dir/4.prenet/all.chain
chainPreNet $output_dir/4.prenet/all.chain $output_dir/$tn.sizes $output_dir/$qn.sizes $output_dir/4.prenet/all_sort.chain
chainNet $output_dir/4.prenet/all_sort.chain $output_dir/$tn.sizes $output_dir/$qn.sizes $output_dir/5.net/temp.tn $output_dir/5.net/temp.qn
netSyntenic $output_dir/5.net/temp.tn $output_dir/5.net/$tn.net
netSyntenic $output_dir/5.net/temp.qn $output_dir/5.net/$qn.net

第五步:Mafing

netToAxt $output_dir/5.net/$tn.net $output_dir/4.prenet/all_sort.chain $output_dir/$tn.2bit $output_dir/$qn.2bit $output_dir/6.net_to_axt/all.axt
axtSort $output_dir/6.net_to_axt/all.axt $output_dir/6.net_to_axt/all_sort.axt
axtToMaf -tPrefix=$tn -qPrefix=$qn $output_dir/6.net_to_axt/all_sort.axt $output_dir/$tn.sizes $output_dir/$qn.sizes $output_dir/7.maf/all.maf

第六步:Combine multiple pairwise results:

roast + E=darer tree.txt ./*Final.maf all.roast.maf

PSMC分析流程

bowtie2-build ../genome.fasta genome
bowtie2 -x genome -p 80 -1 reads.1.fastq -2 reads.2.fastq -S bowtie2.sam
samtools sort -o bowtie2_sort.bam -O BAM -@ 40 -m 4G bowtie2.sam
/opt/biosoft/samtools-0.1.18/samtools mpileup -C50 -uf ../genome.fasta bowtie2_sort.bam > gc_psmc.bcf
/opt/biosoft/samtools-0.1.18/bcftools/bcftools view -c gc_psmc.bcf > Pb_2G.vcf
vcfutils.pl vcf2fq -d 10 -D 100 Pb_2G.vcf | gzip > diploid.fq.gz
/opt/biosoft/psmc-master/utils/fq2psmcfa -q20 diploid.fq.gz > diploid.psmcfa
/opt/biosoft/psmc-master/utils/splitfa diploid.psmcfa > split.psmcfa
/opt/biosoft/psmc-master/psmc -N25 -t15 -r5 -p "4+25*2+4+6" -o diploid.psmc diploid.psmcfa
seq 100 | xargs -i echo /opt/biosoft/psmc-master/psmc -N25 -t15 -r5 -b -p "4+25*2+4+6" -o round-{}.psmc split.fa | sh
cat diploid.psmc round-*.psmc > combined.psmc
/opt/biosoft/psmc-master/utils/psmc_plot.pl -g x -u y combined combined.psmc
# x为该物种繁殖一代的时间,比如人的默认为25年,该处值为-g 25.
# y为该物种碱基替换率,可由进化树的枝长除以r8s评估该物种的分歧时间得到

使用AdmixTools做D-statistics

安装软件和缺少的库文件

git clone https://github.com/DReichLab/AdmixTools.git
cd AdmixTools/src
make clobber
make all
#如果报错/usr/bin/ld: cannot find -lopenblas说明缺少libopenblas库文件
git clone https://github.com/xianyi/OpenBLAS.git
cd OpenBLAS
make
make PREFIX=/path/to/your/installation install
cd /usr/lib/
ln -s /opt/biosoft/OpenBLAS/lib/libopenblas_nehalemp-r0.3.10.dev.a ./libopenblas.a
ln -s /opt/biosoft/OpenBLAS/lib/libopenblas_nehalemp-r0.3.10.dev.so ./libopenblas.so
cd /opt/biosoft/AdmixTools/src/
make clean
make all && make install

根据gff文件统计exon、intron长度分布图

下载需要的脚本和安装Python模块

wget https://github.com/irusri/Extract-intron-from-gff3/archive/master.zip
unzip master.zip
rm master.zip && cd Extract-intron-from-gff3-master/scripts/
sudo chmod 755 *
pip install misopy
pip install gffutils

获取exon、intron的gff文件并提取DNA序列

python /opt/biosoft/Extract-intron-from-gff3-master/scripts/extract_intron_gff3_from_gff3.py out.gff3 out_intron.gff3
##结果文件out_intron.gff3_introns.gff3
awk '/intron\t/{print}' out_intron.gff3_introns.gff3 | sort -k 1,1 -k4,2n   > processed_intron.gff3
awk '/exon\t/{print}' out_intron.gff3_introns.gff3 | sort -k 1,1 -k4,2n   > processed_exon.gff3
perl /opt/biosoft/Extract-intron-from-gff3-master/scripts/extract_seq_from_gff3.pl -d out.tmp/genome.fasta - processed_intron.gff3 > output_intron.fa
perl /opt/biosoft/Extract-intron-from-gff3-master/scripts/extract_seq_from_gff3.pl -d out.tmp/genome.fasta - processed_exon.gff3 > output_exon.fa
##对序列重命名
genome_seq_clear.pl output_intron.fa --seq_prefix intron --min_length 1 --line_length -1 > output_intron_rename.fa
genome_seq_clear.pl output_exon.fa --seq_prefix exon --min_length 1 --line_length -1 > output_exon_rename.fa
##获得每条序列的长度信息
samtools faidx output_intron_rename.fa
cut -f 1,2 output_intron_rename.fa.fai > output_intron_length.txt
samtools faidx output_exon_rename.fa
cut -f 1,2 output_exon_rename.fa.fai > output_exon_length.txt
##R画图
library(ggplot2)
library(patchwork)
data <- read.table("output_gene_length.txt",header=T,check.names=F)
p1 <- ggplot(data,aes(length))+ geom_line(stat="density",color="red")+theme_classic()+theme(axis.title = element_text(size=24),axis.text = element_text(size = 22,color = "black"))
p1

使用Last比对基因组DNA序列

LAST can:

Handle big sequence data, e.g:
Compare two vertebrate genomes
Align billions of DNA reads to a genome
Indicate the reliability of each aligned column.
Use sequence quality data properly.
Compare DNA to proteins, with frameshifts.
Compare PSSMs to sequences
Calculate the likelihood of chance similarities between random sequences.
Do split and spliced alignment.
Train alignment parameters for unusual kinds of sequence (e.g. nanopore).

安装

wget http://last.cbrc.jp/last-1080.zip
unzip last-1080.zip
cd last-1080
make && make install
cd .. && rm last-1080.zip

使用流程:

lastdb -P0 -uMAM4 -R01 darer-MAM4 ../01.proteomics/darer.genome.fasta
#-P 设置线程数,0调用服务器所有线程
#-u 该参数的选择是last比对非常关键的一步,常用参数:
##MAM8:This DNA seeding scheme finds weak similarities with high sensitivity, but low speed and high memory usage (e.g. ~50 GB for mammal genomes).
##MAM4:This DNA seeding scheme is like MAM8, but a bit less sensitive, and uses about half as much memory.
##NEAR:This DNA seeding scheme is good for finding short-and-strong (near-identical) similarities. It is also good for similarities with many gaps (insertions and deletions), because it can find the short matches between the gaps. (Long-and-weak seeding schemes allow for mismatches but not gaps.) 
##YASS:This DNA seeding scheme is good for finding long-and-weak similarities. It is a good compromise for both protein-coding and non protein-coding DNA
#-R01 tells it to mark simple sequences (such as cacacacacacacacaca) by lowercase, but not suppress them.
for i in `cat ../00.initial_data/reflect.txt | cut -f 1`
do
    echo "last-train -P0 --revsym --matsym --gapsym -E0.05 -C2 darer-MAM4 ../01.proteomics/$i.genome.fasta > $i.mat"
done > last_all_pairs_mat.list
ParaFly -c last_all_pairs_mat.list -CPU 25

for i in `cat ../00.initial_data/reflect.txt | cut -f 1`
do
    echo "lastal -P0 -m100 -E0.05 -C2 -p $i.mat darer-MAM4 ../01.proteomics/$i.genome.fasta | last-split -m1 > $i.maf"
done > last_all_pairs_maf.list
ParaFly -c last_all_pairs_maf.list -CPU 25

maf-swap datra.maf | awk '/^s/ {$2 = (++s % 2 ? "datra." : "darer.") $2} 1' | last-split -m1 | maf-swap > datra-2.maf
last-postmask datra-2.maf | maf-convert -n tab | awk -F'=' '$2 <= 1e-5' > datra.tab
#lastdb -P0 -uNEAR -cR11 darer.fa.db ../01.proteomics/darer.genome.fasta
#for i in `cat ../00.initial_data/reflect.txt | cut -f 1`
#do
#    echo "lastal -P20 -m100 -E0.05 darer.fa.db ../01.proteomics/$i.genome.fasta | last-split -m1 > $i.maf"
#done > last_all_pairs_maf.list
#ParaFly -c last_all_pairs_maf.list -CPU 8
#for i in `cat ../00.initial_data/reflect.txt | cut -f 1`
#do
#    echo "maf-swap $i.maf | last-split | maf-swap | last-split | maf-sort > $i.LAST.maf"
#done > last_maf_swap.list
#ParaFly -c last_maf_swap.list -CPU 25
#for i in `cat ../00.initial_data/reflect.txt | cut -f 1`
#do
#    echo "maf-convert psl $i.LAST.maf > $i.psl"
#done > last_maf_convert.list
#ParaFly -c last_maf_convert.list -CPU 25
#for i in `cat ../00.initial_data/reflect.txt | cut -f 1`
#do
#    echo "perl maf.rename.species.S.pl $i.LAST.maf darer $i $i.Final.maf > $i.stat"
#done > last_maf_rename.list
#ParaFly -c last_maf_rename.list -CPU 25

#for i in `cat ../00.initial_data/reflect.txt | cut -f 1`
#do
#    echo "cp $i.Final.maf darer.$i.sing.maf"
#done > cp.lsit
#ParaFly -c lcp.list -CPU 25

roast T=/home/wuchangsong/gc_genome/19.paml/i.LAST/ E=darer "tree topology" ./*Final.maf all.roast.maf
#Then the output file example.roast.maf will contain the orthologous multiple alignment.

参考链接:https://github.com/mcfrith/last-genome-alignments

PHAST安装使用

PHAST:Phylogenetic Analysis with Space/Time models (PHAST) is a freely available software package consisting of a collection of command-line programs and supporting libraries for comparative and evolutionary genomics. Best known as the search engine behind the Conservation tracks in the University of California, Santa Cruz (UCSC) Genome Browser, PHAST also includes several tools for phylogenetic modeling, functional element identification, as well as utilities for manipulating alignments, trees and genomic annotations.

Major PHAST programs:

phastCons: Conservation scoring and identification of conserved elements
phyloFit: Fitting of phylogenetic models to aligned DNA sequences
phyloP: Computation of p-values for conservation or acceleration, either lineage-specific or across all branches
phastOdds: Log-odds scoring for phylogenetic models or phylo-HMMs
exoniphy: Phylogenetic exon prediction
dless: Prediction of elements under lineage-specific selection
prequel: Probabilistic reconstruction of ancestral sequences
phastBias: Identification of GC-biased gene conversion using a phylo-HMM

安装PHAST:

wget -c http://www.netlib.org/clapack/clapack.tgz
tar -xvzf clapack.tgz
cd CLAPACK-3.2.1
cp make.inc.example make.inc && make f2clib && make blaslib && make lib
wget -c http://compgen.cshl.edu/phast/downloads/phast.v1_5.tgz
tar zxf phast.v1_5.tgz
cd phast/src/
make CLAPACKPATH=/opt/biosoft/CLAPACK-3.2.1
echo 'PATH=$PATH:/opt/biosoft/phast/bin' >> ~/.bashrc
source ~/.bashrc

使用MCScanX和JCVI共线性区块分析

首先从基因组数据库下载目标物种的全基因组文件和gff3文件
对gff3文件预处理得到对应格式的gff和bed文件

MCScanX:perl -e 'while (<>) { if (m/^(\S+)\t.*\tgene\t(\d+)\t(\d+).*ID=(darer\d+)/) { print "$1\t$4\t$2\t$3\n" } }' darer.genome.gff3 > darer.gff
jcvi:perl -e 'while (<>) { if (m/^(\S+)\t.*\tgene\t(\d+)\t(\d+)\t(\S+)\t(\S+).*ID=(darer\d+)/) { print "$1\t$2\t$3\t$6\t$4\t$5\n" } }' darer.genome.gff3 > zebr.bed

对NCBI的GFF3文件再进行提取,若基因含有可变剪接,则仅保留CDS长度最长的转录本并获得蛋白序列,该过程参考网上其他教程

cat darer.gff gc.gff > all.gff
cat darer.pep.fasta gc.pep.fasta > all.pep.fasta
makeblastdb -in all.pep.fasta -dbtype prot -title all -parse_seqids -out all -logfile all.log
blast.pl blastp all all.pep.fasta 1e-10 160 blast 6
mkdir MCScanx
mv ../blast.tab all.blast
mv ../all.gff ./
MCScanX all
cd ..
mkdir jcvi && cd jcvi
mv ../*.bed ./
mv ../darer.pep.fasta darer.pep
mv ../gc.pep.fasta gc.pep
python -m jcvi.compara.catalog ortholog --dbtype prot --no_strip_names darer gc
python -m jcvi.compara.synteny screen --minspan=30 --simple darer.gc.anchors darer.gc.anchors.new

LACHESIS安装及使用

在组装基因组时,使用二代或三代数据组装到contigs后,下一步就是将contig提升到染色体水平。如果利用HiC数据,那么目前常见的组装软件有下面几个:

HiRise: 2015年后的GitHub就不再更新
LACHESIS: 发表在NBT,2017年后不再更新
SALSA: 发表在BMC genomics, 仍在更新中
3D-DNA: 发表在science,仍在更新中
ALLHiC: 发表在Nature Plants, 用于解决植物多倍体组装问题

对于二倍体物种而言,目前3D-DNA应该是组装效果较好的一个软件。

软件安装
Lachesis有两个依赖:samtools(低于0.1.19的版本)和C++的boost库(需要大于1.52.0但是又不能太高比如1.67.0就不行)

curl -o LACHESIS.zip https://codeload.github.com/shendurelab/LACHESIS/legacy.zip/master
unzip LACHESIS.zip
mv shendurelab-LACHESIS-2e27abb LACHESIS
cd LACHESIS
export LACHESIS_BOOST_DIR=/opt/biosoft/boost_1_53_0 #boost_1_64_0版本使用时出现问题
export LACHESIS_SAMTOOLS_DIR=/opt/biosoft/samtools-0.1.18
./configure --with-samtools=/opt/biosoft/samtools-0.1.18 --with-boost=/opt/biosoft/boost_1_53_0/
make -j 40
mv src/bin .
mv src/Lachesis bin/

如果Perl版本不是 5.14.2 ,需要打开bin下面的perl脚本,删除如下信息:
///////////////////////////////////////////////////////////////////////////////
// //
// This software and its documentation are copyright (c) 2014-2015 by Joshua //
// N. Burton and the University of Washington. All rights are reserved. //
// //
// THIS SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS //
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF //
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. //
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY //
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT //
// OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR //
// THE USE OR OTHER DEALINGS IN THE SOFTWARE. //
// //
///////////////////////////////////////////////////////////////////////////////

第一行替换成#!/usr/bin/perl -w

添加环境变量

echo 'PATH=$PATH:/opt/biosoft/LACHESIS/bin/' > ~/.bashrc

软件使用

实际的软件使用要求你需要提供至少两类输入文件
•HiC数据,双端fastq格式
•初始组装,fasta格式

使用bwa比对得到bam文件

samtools faidx draft.asm.fasta
bwa index -a bwtsw draft.asm.fasta
bwa aln -t 155 draft.asm.fasta reads_R1.fastq.gz > reads_R1.sai
bwa aln -t 155 draft.asm.fasta reads_R2.fastq.gz > reads_R2.sai
bwa sampe draft.asm.fasta reads_R1.sai reads_R2.sai reads_R1.fastq.gz reads_R2.fastq.gz > sample.bwa_aln.sam
PreprocessSAMs.pl sample.bwa_aln.sam draft.asm.fasta MBOI
filterBAM_forHiC.pl sample.bwa_aln.REduced.paired_only.bam sample.clean.sam
samtools view -bt draft.asm.fasta.fai sample.clean.sam > sample.clean.bam

设置配置文件

cp /opt/biosoft/LACHESIS/bin/INIs/test_case.ini sample.ini

SPECIES = test # 写实际的物种名即可
DRAFT_ASSEMBLY_FASTA = draft.asm.fasta # 待组装序列的实际位置
SAM_DIR = . #表示当前目录下查找文件
SAM_FILES = sample.clean.bam #bam文件名
RE_SITE_SEQ = AAGCTT #酶切识别序列
USE_REFERENCE = 0 #不使用参考序列
BLAST_FILE_HEAD = . # BLAST的输出结果
CLUSTER_N = 16 # 最终聚类数目

# contig中最小的酶切位点数,
CLUSTER_MIN_RE_SITES=25
# contig中最大的link密度, 也就是一个区域与多个contig存在信号
# 可能是异染色质或重复序列
CLUSTER_MAX_LINK_DENSITY=2
# 对于CLUSTER_MIN_RE_SITES过滤掉的contig在初步聚类后还有机会加入已有的分组中
# 如果它加入其中的信号是加入另一组信号的3倍
CLUSTER_NONINFORMATIVE_RATIO=3
# 允许成组的最小酶切数
ORDER_MIN_N_RES_IN_TRUNK=15

运行:

ulimit -s 10240
Lachesis sample.ini
CreateScaffoldedFasta.pl draft.asm.fasta out/test_case