Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 47 additions & 39 deletions oncodashkb/adapters/copy_number_alterations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,6 @@ transformers:
- citation_PM_ids
for_objects:
- alteration
- map:
columns:
- consequence
to_property:
- consequence
for_objects:
- alteration
- map:
columns:
- level_of_evidence
Expand All @@ -103,73 +96,97 @@ transformers:
metadata:
- name: copy_number_alterations



validate: # Validation for cna labeled "external.".
columns:
patient_id:
dtype: str
description: "Cohort code from clinical data"
sample_id:
dtype: str
description: "Sample id of biopsy (origin OVCA database)"
referenceGenome:
dtype: str
description: "Standard human reference genome used in WGS read alignment"
checks:
isin:
value:
- GRCh37
- GRCh38
hugoSymbol:
dtype: str
description: "Gene name in Hugo nomenclature"
checks:
str_matches: ^[a-zA-Z0-9-]+$
alteration:
dtype: str
description: "Genomic alteration type"
checks:
isin:
value:
- AMPLIFICATION
- '4' #FIXME: possible error.
# - DELETION #FIXME: not present in data declared in source description.
# - UNKNOWN #FIXME: not present in data declared in source description.
hugoSymbol:
dtype: str
checks:
str_matches: ^[a-zA-Z0-9-]+$
- DELETION
- UNKNOWN
tumorType:
dtype: str
description: "Tumor type abbreviation (OncoTree based)"
checks:
isin:
value:
- HGSOC
- '5' #FIXME: possible error.
oncogenic:
dtype: str
description: "Indication of alteration oncogenecity"
nullable: true
checks:
isin:
value:
- Inconclusive # Not present in source documentation description.
- Inconclusive
- Likely Neutral
- Likely Oncogenic
- Oncogenic
- Unknown
- known # Not present in source documentation description.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here known still comes out as a value present in the data.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added "known" then.

mutationEffectDescription:
dtype: str
description: "Brief overview of the biological effect of the alteration with references"
nullable: true
gene_role:
dtype: str
description: "The biological effect of a mutation/alteration on the protein function"
nullable: true
checks:
isin:
value:
- Act # Not present in source documentation description.
- Gain-of-function
- Inconclusive
- Likely Gain-of-function
# - Likely Loss-of-function FIXME: not present in data declared in source description.
# - Likely Neutral FIXME: not present in data declared in source description.
# - Likely Switch-of-function FIXME: not present in data declared in source description.
# - Loss-of-function FIXME: not present in data declared in source description.
- LoF # Not present in source documentation description.
# - Neutral FIXME: not present in data declared in source description.
# - Switch-of-function FIXME: not present in data declared in source description.
- Likely Loss-of-function
- Likely Neutral
- Loss-of-function
- Unknown
- ambiguous # Not present in source documentation description.
- Act
- LoF
- ambiguous
citationPMids:
dtype: str
description: "Citations related to the biological effect separated by semicolon"
nullable: true
checks:
str_matches: ^[0-9]{7,}(,[0-9]{7,})*$
str_matches: ^[0-9]{7,}(;[0-9]{7,})*$
geneSummary:
dtype: str
description: "Brief overview of the gene and its role in cancer"
nullable: true
variantSummary:
dtype: str
description: "Brief description of the variant oncogenicity"
nullable: true
tumorTypeSummary:
dtype: str
description: "Describes the therapeutic implication that applies to the drug indication in discovered cancer"
nullable: true
level_of_evidence:
dtype: str
description: "The highest level of evidence for therapeutic implications based on OncoKB (CGI levels are mapped to this)"
nullable: true
checks:
isin:
Expand All @@ -180,13 +197,4 @@ validate: # Validation for cna labeled "external.".
- LEVEL_3B
- LEVEL_4
- LEVEL_R1
- LEVEL_R2
geneSummary:
dtype: str
nullable: true
variantSummary:
dtype: str
nullable: true
tumorTypeSummary:
dtype: str
nullable: true
- LEVEL_R2
125 changes: 65 additions & 60 deletions oncodashkb/adapters/copy_number_alterations_local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,6 @@ transformers:
- citation_PM_ids
for_objects:
- alteration
- map:
columns:
- consequence
to_property:
- consequence
for_objects:
- alteration
- map:
columns:
- level_of_evidence
Expand All @@ -106,96 +99,108 @@ metadata:

validate: # Validation for cna labeled "local.".
columns:
# unnamed:0: #FIXME: weird column.
# dtype: int64
patient_id:
dtype: str #FIXME: not present in data declared in source description.
dtype: str
description: "Cohort code from clinical data"
sample_id:
dtype: str
description: "Sample id of biopsy (origin OVCA database)"
referenceGenome:
dtype: str
description: "Standard human reference genome used in WGS read alignment"
checks:
isin:
- GRCh37
- GRCh38
- '2' #FIXME: possible error.
hugoSymbol:
dtype: str
description: "Gene name in Hugo nomenclature"
alteration:
dtype: str
description: "Genomic alteration type"
checks:
isin:
value:
- AMPLIFICATION
- '4' #FIXME: possible error.
# - DELETION FIXME: not present in data declared in source description.
# - UNKNOWN FIXME: not present in data declared in source description.
- DELETION
- UNKNOWN
tumorType:
dtype: str
description: "Tumor type abbreviation (OncoTree based)"
checks:
isin:
value:
- HGSOC
- '5' #FIXME: possible error.
nMajor: #FIXME: not present in data declared in source description.
dtype: float64
nMinor: #FIXME: not present in data declared in source description.
dtype: float64
lohstatus: #FIXME: not present in data declared in source description.
ploidy:
dtype: float64
description: "The number of complete sets of chromosomes in cancer cells"
nMinor:
dtype: int64
description: "Minor allele copynumber"
nMajor:
dtype: int64
description: "Major allele copynumber"
lohstatus:
dtype: str
description: "Loss of heterozygosity status"
checks:
isin:
value:
- HET
- '8' #FIXME: possible error.
ploidy: #FIXME: not present in data declared in source description.
dtype: float64
start: #FIXME: not present in data declared in source description.
dtype: float64
end: #FIXME: not present in data declared in source description.
dtype: float64
strand: #FIXME: not present in data declared in source description.
dtype: float64
- LOH
chromosome:
dtype: str
description: "Chromosome number where alteration is occurring"
start:
dtype: int64
description: "Start position of alteration in chromosome"
end:
dtype: int64
description: "End position of alteration in chromosome"
strand:
dtype: int64
description: "DNA strand"
checks:
isin:
value:
- -1.0
- 1.0
- 12.0 #FIXME: possible error.
band: #FIXME: not present in data declared in source description.
- -1
- 1
band:
dtype: str
nProbesCr: #FIXME: not present in data declared in source description.
dtype: float64
nProbesAf: #FIXME: not present in data declared in source description.
dtype: float64
logR: #FIXME: not present in data declared in source description.
description: "Chromosomal locus"
nProbesCr:
dtype: int64
description: "Number of probes"
nProbesAf:
dtype: int64
description: "Number of probes"
logR:
dtype: float64
baf: #FIXME: not present in data declared in source description.
description: "Signal intensity"
baf:
dtype: float64
nAraw: #FIXME: not present in data declared in source description.
description: "B-allele frequency of occurrence"
nAraw:
dtype: float64
nBraw: #FIXME: not present in data declared in source description.
description: "Calculated copy number of allele A"
nBraw:
dtype: float64
purifiedLogR: #FIXME: not present in data declared in source description.
description: "Calculated copy number of allele B"
purifiedLogR:
dtype: float64
purifiedBaf: #FIXME: not present in data declared in source description.
description: "Mean signal intensity"
purifiedBaf:
dtype: float64
purifiedLoh: #FIXME: not present in data declared in source description.
description: "B-allele frequency measurement"
purifiedLoh:
dtype: float64
minPurifiedLogR: #FIXME: not present in data declared in source description.
description: "LoH measurement"
minPurifiedLogR:
dtype: float64
maxPurifiedLogR: #FIXME: not present in data declared in source description.
description: "Minimum measured signal intensity"
maxPurifiedLogR:
dtype: float64
breaksInGene: #FIXME: not present in data declared in source description.
dtype: float64
checks:
isin:
value:
- 0.0
- 1.0
- 2.0
- 3.0
- 4.0
- 5.0
- 6.0
- 7.0
- 25.0 #FIXME: possible error.
description: "Maximum measured signal intensity"
breaksInGene:
dtype: int64
description: "Number of copy number level shift points in alteration"
Loading