【6.2.4】CHOPCHOP的部署
一、ChopChop
1.1 安装软件
cd /data/user/sam/project/crispr/lib
git clone https://bitbucket.org/valenlab/chopchop.git
cd chopchop
# 安装沙箱
virtualenv --no-site-packages venv
# 启动沙箱
source venv/bin/activate
pip install biopython==1.76
pip install pandas
pip install scipy
pip install numpy
pip install argparse
pip install scikit-learn==0.18.1
pip install mysql-python==1.2.3
1.2 下载数据库
基因组位置:
cd /data/database/homo
wget -c https://chopchop.cbu.uib.no/genomes/hg38.1.ebwt --no-check-certificate;
wget -c --no-check-certificate https://chopchop.cbu.uib.no/genomes/hg38.2.ebwt;
wget -c --no-check-certificate https://chopchop.cbu.uib.no/genomes/hg38.2bit;
wget -c --no-check-certificate https://chopchop.cbu.uib.no/genomes/hg38.3.ebwt;
wget -c --no-check-certificate https://chopchop.cbu.uib.no/genomes/hg38.4.ebwt;
wget -c --no-check-certificate https://chopchop.cbu.uib.no/genomes/hg38.rev.1.ebwt;
wget -c --no-check-certificate https://chopchop.cbu.uib.no/genomes/hg38.rev.2.ebwt
1.3 下载基因组(这一步可以不用,因为上面已经下载数据库了)
cd /data/database/homo
wget -c http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz gzip -d hg19.fa.gz /data/software/tools/bowtie-1.0.1/bowtie-build -f hg19.fa hg19
wget -c http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.2bit
cd /data/database/genome/hg38 wget -c http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
1.4 gtfToGenePred
见上一篇博文
1.5 配置config
修改程序路径
vim config.json
{
"PATH": {
"PRIMER3": "/home/sam/project/crispr/lib/chopchop/primer3_core",
"BOWTIE": "/home/sam/project/crispr/lib/chopchop/bowtie/bowtie",
"TWOBITTOFA": "/home/sam/project/crispr/lib/chopchop/twoBitToFa",
"TWOBIT_INDEX_DIR": "/data/database/homo",
"BOWTIE_INDEX_DIR": "/data/database/homo",
"ISOFORMS_INDEX_DIR": "/your/full/path/to/ebwt_transcriptome_folder_and_2bit_of_genome",
"ISOFORMS_MT_DIR": "/your/full/path/to/vienna_MT_folder",
"GENE_TABLE_INDEX_DIR": "/data/database/homo/genepred"
},
"THREADS": 1
}
二、使用和测试
source /home/sam/project/crispr/lib/chopchop/venv/bin/activate
/home/sam/project/crispr/lib/chopchop/chopchop.py -G hg38 -o temp -Target chr10:1000000-1001000
/home/sam/project/crispr/lib/chopchop/chopchop.py -G hg19 -o temp -Target NM_144906
cd /data/user/sam/project/crispr/lib/chopchop/tests
/data/user/sam/project/crispr/lib/chopchop/chopchop.py -G hg38 -o TRAC -Target TRAC >trac.tsv
2.1 具体例子
cd /home/sam/project/crispr/lib/chopchop/tests
/home/sam/project/crispr/lib/chopchop/chopchop.py -G hg38 -o CD52 -Target CD52 --PAM NAG
/data/user/sam/project/crispr/lib/chopchop/chopchop.py -G hg38 -o CD52 -Target CD52 --PAM NAG
/data/user/sam/project/crispr/lib/chopchop/chopchop.py -G hg38 -o CD52 --PAM NAG --fasta CD52/sequence.fa
/home/sam/project/crispr/lib/chopchop/chopchop.py -G hg38 -o CD52 -Target CD52 --PAM NGG -g 17 --scoringMethod DOENCH_2016 --consensusUnion
三、参数说明
source /home/sam/project/crispr/lib/chopchop/venv/bin/activate
/data/user/sam/project/crispr/lib/chopchop/chopchop.py -G $(species) -o $(target) -Target $(target) --target $(region) --targetDownstreamPromoter $(DownstreamPromoter) --targetUpstreamPromoter $(UpstreamPromoter) --PAM $(PAM) --filterGCmin $(filterGCmin) --filterGCmax $(filterGCmax) -filterSelfCompMax $(filterSelfCompMax) --maxMismatches $(maxMismatches) --scoringMethod $(scoringMethod) --guideSize $(guideSize) >result.txt
/data/user/sam/project/crispr/lib/chopchop/chopchop.py -G hg38 -o CD52 --PAM NAG -Target CD52 -filterSelfCompMax 0 --filterGCmin 10 --filterGCmax 80 -filterSelfCompMax 0 --maxMismatches 3 --scoringMethod DOENCH_2016
–consensusUnion 加上这个额参数,就以为着consensusUnion # False union | intersection consensusUnion
–PAM NAG >result.tsv
说明:
1. suport RefSeq/ENSEMBL/gene name or genomic coordinates input .
2. "-Target", "--targets", type=str, help="Target genes or regions", required=True
"-G", "--genome", default="danRer7", metavar="GENOME", help="The genome to search."
"-T", "--MODE", default=1, type=int, choices=[1, 2, 3, 4], help="Set mode (int): default is Cas9 = 1, Talen = 2, Cpf1 = 3, Nickase = 4"
"-t", "--target", default="CODING", dest="targetRegion", help="Target the whole gene CODING/WHOLE/UTR5/UTR3/SPLICE / PROMOTER. Default is CODING.")
"-TDP", "--targetDownstreamPromoter", default=200, type=int, help="how many bp to target downstream of TSS"
"-TUP", "--targetUpstreamPromoter", default=200, type=int, help="how many bp to target upstream of TSS"
-e", "--exon", help="Comma separated list of exon indices. Only find sites in this subset. ", metavar="EXON_NUMBER", dest="exons")
"-consensusUnion", "--consensusUnion", default=False, action="store_true", help="When calculating consensus sequence from multiple isoforms default uses intersection. This option specifies union of isoforms.")
"-filterGCmin", "--filterGCmin", default=0, type=int, help="Minimum required GC percentage. Default is 0.")
"-filterGCmax", "--filterGCmax", default=100, type=int, help="Maximum allowed GC percentage. Default is 100.")
-filterSelfCompMax FILTERSELFCOMPMAX, --filterSelfCompMax FILTERSELFCOMPMAX
Maximum acceptable Self-complementarity score. Default
is -1, no filter.
-g GUIDE_SIZE, --guideSize GUIDE_SIZE
The size of the guide RNA.
"-M", "--PAM", type=str, help="The PAM motif."
"-F", "--fasta", default=False, action="store_true", help="Use FASTA file as input rather than gene or genomic region.")
"-v", "--maxMismatches", default=3, type=int, choices=[0, 1, 2, 3], metavar="MAX_MISMATCHES", help="The number of mismatches to check across the sequence.")
"-m", "--maxOffTargets", metavar="MAX_HITS", help="The maximum number of off targets allowed."
-scoringMethod {XU_2015,DOENCH_2014,DOENCH_2016,MORENO_MATEOS_2015,CHARI_2015,G_20,KIM_2018,ALL}, --scoringMethod {XU_2015,DOENCH_2014,DOENCH_2016,MORENO_MATEOS_2015,CHARI_2015,G_20,KIM_2018,ALL}
Scoring used for Cas9 and Nickase. Default is G_20
"-repairPredictions", "--repairPredictions", default=None, type=str,
choices=['mESC', 'U2OS', 'HEK293', 'HCT116', 'K562'], help="Use inDelphi from Shen et al 2018 to predict repair profiles for every guideRNA, this will make .repProfile and .repStats files")
-isoforms, --isoforms
Search for offtargets on the transcriptome.
参考资料
这里是一个广告位,,感兴趣的都可以发邮件聊聊:tiehan@sina.cn
个人公众号,比较懒,很少更新,可以在上面提问题,如果回复不及时,可发邮件给我: tiehan@sina.cn
个人公众号,比较懒,很少更新,可以在上面提问题,如果回复不及时,可发邮件给我: tiehan@sina.cn