-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathncbi_assemblyseqload.config
More file actions
187 lines (137 loc) · 5.19 KB
/
ncbi_assemblyseqload.config
File metadata and controls
187 lines (137 loc) · 5.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#format: sh
#
# ncbi_assemblyseqload.config.default
#
# This file sets up environment variables that are needed for the
# NCBI Assembly Sequence and Coordinate Loads
#
# The following config settings change between loads:
# 1) INPUTDIR
# 2) SEQ_RELEASE_DATE
# 3) SEQ_RELEASE_NO
# 4) COORD_VERSION
#
# default is true for the following
# 1) load associations (ASSOC_JNUMBER) - may want to make false during testing
#
# default is false for the following - may want to make true during testing
# 1) load sequence coordinate cache table (OK_LOAD_SEQ_CACHE_TABLES)
# 2) load marker cache tables (OK_LOAD_MRK_CACHE_TABLES)
###########################################################################
###########################################################################
#
# LOAD SPECIFIC SETTINGS
#
###########################################################################
# Full path to dir containing assembly input files -
INPUTDIR=${DATALOADSOUTPUT}/mgi/genemodelload/output
# Full path the the logs, reports, and archive directories
FILEDIR=${DATALOADSOUTPUT}/assembly/ncbiseqload
export INPUTDIR FILEDIR
# Full path the the logs, reports, and archive directories
LOGDIR=${FILEDIR}/logs
RPTDIR=${FILEDIR}/reports
OUTPUTDIR=${FILEDIR}/output
ARCHIVEDIR=${FILEDIR}/archive
export LOGDIR RPTDIR OUTPUTDIR ARCHIVEDIR
# Complete path name of the log files
LOG_PROC=${LOGDIR}/assemblyseqload.proc.log
LOG_DIAG=${LOGDIR}/assemblyseqload.diag.log
LOG_CUR=${LOGDIR}/assemblyseqload.cur.log
LOG_VAL=${LOGDIR}/assemblyseqload.val.log
export LOG_PROC LOG_DIAG LOG_CUR LOG_VAL
# Complete path name of the ncbi input file
INFILE_NAME=${INPUTDIR}/ncbi_genemodels_load.txt
# Complete path to config file for assocload
CONFIG_ASSOCLOAD=${ASSEMBLYSEQLOAD}/ncbi_assocload.config
export INFILE_NAME CONFIG_ASSOCLOAD
###########################################################################
#
# ncbi assemblyseqload settings
#
###########################################################################
# true if a virtual (computationally derived) sequence
SEQ_VIRTUAL=true
# NCBI sequence type
SEQ_TYPE=DNA
# NCBI sequence quality
SEQ_QUALITY=High
# jnumber for the sequence load
SEQ_JNUMBER=J:90438
export SEQ_VIRTUAL SEQ_TYPE SEQ_QUALITY SEQ_JNUMBER
# The flavor of assocload we are running
ASSOCLOADER_SH=${ASSOCLOAD}/bin/AssocLoad2.sh
# jnumber for the accession association load
# set to '0' (zero) if you DO NOT want to run the association load
ASSOC_JNUMBER=J:90438
#ASSOC_JNUMBER=0
ASSOC_OBJECTTYPE=Marker
export ASSOCLOADER_SH ASSOC_JNUMBER ASSOC_OBJECTTYPE
# load the sequence coordinate cache table?
OK_LOAD_SEQ_CACHE_TABLES=false
# Load marker cache tables? If true, only loads if we load associations
OK_LOAD_MRK_CACHE_TABLES=false
export OK_LOAD_SEQ_CACHE_TABLES OK_LOAD_MRK_CACHE_TABLES
# source information
SEQ_ORGANISM="mouse, laboratory"
SEQ_STRAIN=C57BL/6J
SEQ_TISSUE="Not Specified"
SEQ_AGE="Not Specified"
SEQ_GENDER=Pooled
SEQ_CELLLINE="Not Specified"
export SEQ_ORGANISM SEQ_STRAIN SEQ_TISSUE SEQ_AGE SEQ_GENDER SEQ_CELLLINE
# NCBI release date yyyy/mm/dd
SEQ_RELEASE_DATE=2024/02/07
# for the sequence version
SEQ_RELEASE_NO="GRCm39"
# Load's MGI_User login value for DB tables CreatedBy and ModifiedBy column
JOBSTREAM=ncbi_assemblyseqload
# SEQ_Sequence Provider controlled vocab for GenBank
SEQ_PROVIDER="NCBI Gene Model"
# logical db name for this data provider
SEQ_LOGICALDB="NCBI Gene Model"
export SEQ_RELEASE_DATE SEQ_RELEASE_NO JOBSTREAM SEQ_PROVIDER SEQ_LOGICALDB
# MGI type name of the feature
# e.g. 'Sequence' or 'Marker'
COORD_FEATURE_MGITYPE=Sequence
# add or delete_reload
COORD_LOAD_MODE=delete_reload
# the interpreter to use
SEQ_INTERPRETER=org.jax.mgi.shr.dla.input.mgs.MGSAssemblyFormatInterpreter
export COORD_FEATURE_MGITYPE COORD_LOAD_MODE SEQ_INTERPRETER
############################################################################
#
# ncbi coordload settings
#
###########################################################################
# Java classes
COORD_INTERPRETER=org.jax.mgi.shr.dla.input.mgs.MGSCoordinateFormatInterpreter
COORD_PROCESSOR=org.jax.mgi.dbs.mgd.loads.Coord.ChromosomeCoordMapProcessor
export COORD_INTERPRETER COORD_PROCESSOR
# logical db name for this data provider
COORD_LOGICALDB=${SEQ_LOGICALDB}
# name of organism whose coordinates we are loading
COORD_ORGANISM=${SEQ_ORGANISM}
# version of the coordinate load
COORD_VERSION="GRCm39"
# coordinate map collection to which this load belongs
COORD_COLLECTION_NAME="NCBI Gene Model"
# abbreviation for the coordinate collection
# if no value assigned will be same as COORD_COLLECTION
COORD_COLLECTION_ABBREV="NCBI"
# MGI type name of the feature
COORD_FEATURE_MGITYPE=Sequence
export COORD_LOGICALDB COORD_ORGANISM COORD_VERSION
export COORD_COLLECTION_NAME COORD_COLLECTION_ABBREV COORD_FEATURE_MGITYPE
# OVERRIDE the common config - set to true for NCBI only
SEQ_REPEATS_OK=true
COORD_REPEATS_OK=true
export SEQ_REPEATS_OK COORD_REPEATS_OK
###########################################################################
#
# MISCELLANEOUS SETTINGS
#
###########################################################################
# The name of the load for the subject of an email notification
MAIL_LOADNAME="NCBI Assembly Load"
export MAIL_LOADNAME