diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3355e12..c04fe6d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # HGVS library change log
 
+## Unreleased
+  - Use same code for calculating transcript position of start/stop codons.
+  - Be able to pass in pre-calculated start/stop from transcript_json if available
+
+## 0.12.1 (2021-12-09)
+  - Fix issue #61 - Regex for reference HGVS without reference base
+
+## 0.12.0 (2021-11-24)
+  - cDNA gaps
+  - Refactor models
+  - Support non-coding, mitochondria and LRG transcripts
+  - Validate coordinate span equals reference length
+  - Scripts to generate transcripts from RefSeq and Ensembl GTFs on the web
+
 ## 0.9.2 (2014-12-11)
   - Rename package to pyhgvs to avoid naming conflict with other libraries.
 
diff --git a/pyhgvs/__init__.py b/pyhgvs/__init__.py
index b4df00c..0521dab 100644
--- a/pyhgvs/__init__.py
+++ b/pyhgvs/__init__.py
@@ -7,503 +7,16 @@
 Definition of which transcript to use coding variants:
 ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
 
-
-HGVS language currently implemented.
-
-HGVS = ALLELE
-     | PREFIX_NAME : ALLELE
-
-PREFIX_NAME = TRANSCRIPT
-            | TRANSCRIPT '(' GENE ')'
-
-TRANSCRIPT = TRANSCRIPT_NAME
-           | TRANSCRIPT_NAME '.' TRANSCRIPT_VERSION
-
-TRANSCRIPT_VERSION = NUMBER
-
-ALLELE = 'c.' CDNA_ALLELE    # cDNA
-       | 'g.' GENOMIC_ALLELE # genomic
-       | 'm.' MIT_ALLELE     # mitochondrial sequence
-       | 'n.' NC_ALLELE      # non-coding RNA reference sequence
-       | 'r.' RNA_ALLELE     # RNA sequence (like r.76a>u)
-       | 'p.' PROTEIN_ALLELE # protein sequence (like  p.Lys76Asn)
-
-NC_ALLELE =
-RNA_ALLELE =
-CDNA_ALLELE = CDNA_COORD SINGLE_BASE_CHANGE
-            | CDNA_COORD_RANGE MULTI_BASE_CHANGE
-
-GENOMIC_ALLELE =
-MIT_ALLELE = COORD SINGLE_BASE_CHANGE
-           | COORD_RANGE MULTI_BASE_CHANGE
-
-SINGLE_BASE_CHANGE = CDNA_ALLELE = CDNA_COORD BASE '='        # no change
-                   | CDNA_COORD BASE '>' BASE                 # substitution
-                   | CDNA_COORD 'ins' BASE                    # 1bp insertion
-                   | CDNA_COORD 'del' BASE                    # 1bp deletion
-                   | CDNA_COORD 'dup' BASE                    # 1bp duplication
-                   | CDNA_COORD 'ins'                         # 1bp insertion
-                   | CDNA_COORD 'del'                         # 1bp deletion
-                   | CDNA_COORD 'dup'                         # 1bp duplication
-                   | CDNA_COORD 'del' BASE 'ins' BASE         # 1bp indel
-                   | CDNA_COORD 'delins' BASE                 # 1bp indel
-
-MULTI_BASE_CHANGE = COORD_RANGE 'del' BASES             # deletion
-                  | COORD_RANGE 'ins' BASES             # insertion
-                  | COORD_RANGE 'dup' BASES             # duplication
-                  | COORD_RANGE 'del'                   # deletion
-                  | COORD_RANGE 'dup'                   # duplication
-                  | COORD_RANGE 'del' BASES 'ins' BASES # indel
-                  | COORD_RANGE 'delins' BASES          # indel
-
-
-AMINO1 = [GAVLIMFWPSTCYNQDEKRH]
-
-AMINO3 = 'Gly' | 'Ala' | 'Val' | 'Leu' | 'Ile' | 'Met' | 'Phe' | 'Trp' | 'Pro'
-       | 'Ser' | 'Thr' | 'Cys' | 'Tyr' | 'Asn' | 'Gln' | 'Asp' | 'Glu' | 'Lys'
-       | 'Arg' | 'His'
-
-PROTEIN_ALLELE = AMINO3 COORD '='               # no peptide change
-               | AMINO1 COORD '='               # no peptide change
-               | AMINO3 COORD AMINO3 PEP_EXTRA  # peptide change
-               | AMINO1 COORD AMINO1 PEP_EXTRA  # peptide change
-               | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA        # indel
-               | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA        # indel
-               | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA AMINO3 # indel
-               | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA AMINO1 # indel
-
-# A genomic range:
-COORD_RANGE = COORD '_' COORD
-
-# A cDNA range:
-CDNA_COORD_RANGE = CDNA_COORD '_' CDNA_COORD
-
-# A cDNA coordinate:
-CDNA_COORD = COORD_PREFIX COORD
-           | COORD_PREFIX COORD OFFSET_PREFIX OFFSET
-COORD_PREFIX = '' | '-' | '*'
-COORD = NUMBER
-OFFSET_PREFIX = '-' | '+'
-OFFSET = NUMBER
-
-# Primatives:
-NUMBER = \d+
-BASE = [ACGT]
-BASES = BASE+
-
 """
 from __future__ import absolute_import
 from __future__ import unicode_literals
 
 import re
 
-from .variants import justify_indel
-from .variants import normalize_variant
-from .variants import revcomp
-
-
-CHROM_PREFIX = 'chr'
-CDNA_START_CODON = 'cdna_start'
-CDNA_STOP_CODON = 'cdna_stop'
-
-
-class HGVSRegex(object):
-    """
-    All regular expression for HGVS names.
-    """
-
-    # DNA syntax
-    # http://www.hgvs.org/mutnomen/standards.html#nucleotide
-    BASE = "[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]|\d+"
-    BASES = "[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\d+"
-    DNA_REF = "(?P<ref>" + BASES + ")"
-    DNA_ALT = "(?P<alt>" + BASES + ")"
-
-    # Mutation types
-    EQUAL = "(?P<mutation_type>=)"
-    SUB = "(?P<mutation_type>>)"
-    INS = "(?P<mutation_type>ins)"
-    DEL = "(?P<mutation_type>del)"
-    DUP = "(?P<mutation_type>dup)"
-
-    # Simple coordinate syntax
-    COORD_START = "(?P<start>\d+)"
-    COORD_END = "(?P<end>\d+)"
-    COORD_RANGE = COORD_START + "_" + COORD_END
-
-    # cDNA coordinate syntax
-    CDNA_COORD = ("(?P<coord_prefix>|-|\*)(?P<coord>\d+)"
-                  "((?P<offset_prefix>-|\+)(?P<offset>\d+))?")
-    CDNA_START = ("(?P<start>(?P<start_coord_prefix>|-|\*)(?P<start_coord>\d+)"
-                  "((?P<start_offset_prefix>-|\+)(?P<start_offset>\d+))?)")
-    CDNA_END = (r"(?P<end>(?P<end_coord_prefix>|-|\*)(?P<end_coord>\d+)"
-                "((?P<end_offset_prefix>-|\+)(?P<end_offset>\d+))?)")
-    CDNA_RANGE = CDNA_START + "_" + CDNA_END
-
-    # cDNA allele syntax
-    CDNA_ALLELE = [
-        # No change
-        CDNA_START + DNA_REF + EQUAL,
-
-        # Substitution
-        CDNA_START + DNA_REF + SUB + DNA_ALT,
-
-        # 1bp insertion, deletion, duplication
-        CDNA_START + INS + DNA_ALT,
-        CDNA_START + DEL + DNA_REF,
-        CDNA_START + DUP + DNA_REF,
-        CDNA_START + DEL,
-        CDNA_START + DUP,
-
-        # Insertion, deletion, duplication
-        CDNA_RANGE + INS + DNA_ALT,
-        CDNA_RANGE + DEL + DNA_REF,
-        CDNA_RANGE + DUP + DNA_REF,
-        CDNA_RANGE + DEL,
-        CDNA_RANGE + DUP,
-
-        # Indels
-        "(?P<delins>" + CDNA_START + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
-        "(?P<delins>" + CDNA_RANGE + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
-        "(?P<delins>" + CDNA_START + 'delins' + DNA_ALT + ")",
-        "(?P<delins>" + CDNA_RANGE + 'delins' + DNA_ALT + ")",
-    ]
-
-    CDNA_ALLELE_REGEXES = [re.compile("^" + regex + "$")
-                           for regex in CDNA_ALLELE]
-
-    # Peptide syntax
-    PEP = "([A-Z]([a-z]{2}))+"
-    PEP_REF = "(?P<ref>" + PEP + ")"
-    PEP_REF2 = "(?P<ref2>" + PEP + ")"
-    PEP_ALT = "(?P<alt>" + PEP + ")"
-
-    PEP_EXTRA = "(?P<extra>(|=|\?)(|fs))"
-
-    # Peptide allele syntax
-    PEP_ALLELE = [
-        # No peptide change
-        # Example: Glu1161=
-        PEP_REF + COORD_START + PEP_EXTRA,
-
-        # Peptide change
-        # Example: Glu1161Ser
-        PEP_REF + COORD_START + PEP_ALT + PEP_EXTRA,
-
-        # Peptide indel
-        # Example: Glu1161_Ser1164?fs
-        "(?P<delins>" + PEP_REF + COORD_START + "_" + PEP_REF2 + COORD_END +
-        PEP_EXTRA + ")",
-        "(?P<delins>" + PEP_REF + COORD_START + "_" + PEP_REF2 + COORD_END +
-        PEP_ALT + PEP_EXTRA + ")",
-    ]
-
-    PEP_ALLELE_REGEXES = [re.compile("^" + regex + "$")
-                          for regex in PEP_ALLELE]
-
-    # Genomic allele syntax
-    GENOMIC_ALLELE = [
-        # No change
-        COORD_START + DNA_REF + EQUAL,
-
-        # Substitution
-        COORD_START + DNA_REF + SUB + DNA_ALT,
-
-        # 1bp insertion, deletion, duplication
-        COORD_START + INS + DNA_ALT,
-        COORD_START + DEL + DNA_REF,
-        COORD_START + DUP + DNA_REF,
-        COORD_START + DEL,
-        COORD_START + DUP,
-
-        # Insertion, deletion, duplication
-        COORD_RANGE + INS + DNA_ALT,
-        COORD_RANGE + DEL + DNA_REF,
-        COORD_RANGE + DUP + DNA_REF,
-        COORD_RANGE + DEL,
-        COORD_RANGE + DUP,
-
-        # Indels
-        "(?P<delins>" + COORD_START + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
-        "(?P<delins>" + COORD_RANGE + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
-        "(?P<delins>" + COORD_START + 'delins' + DNA_ALT + ")",
-        "(?P<delins>" + COORD_RANGE + 'delins' + DNA_ALT + ")",
-    ]
-
-    GENOMIC_ALLELE_REGEXES = [re.compile("^" + regex + "$")
-                              for regex in GENOMIC_ALLELE]
-
-
-class ChromosomeSubset(object):
-    """
-    Allow direct access to a subset of the chromosome.
-    """
-
-    def __init__(self, name, genome=None):
-        self.name = name
-        self.genome = genome
-
-    def __getitem__(self, key):
-        """Return sequence from region [start, end)
-
-        Coordinates are 0-based, end-exclusive."""
-        if isinstance(key, slice):
-            start, end = (key.start, key.stop)
-            start -= self.genome.start
-            end -= self.genome.start
-            return self.genome.genome[self.genome.seqid][start:end]
-        else:
-            raise TypeError('Expected a slice object but '
-                            'received a {0}.'.format(type(key)))
-
-    def __repr__(self):
-        return 'ChromosomeSubset("%s")' % self.name
-
-
-class GenomeSubset(object):
-    """
-    Allow the direct access of a subset of the genome.
-    """
-
-    def __init__(self, genome, chrom, start, end, seqid):
-        self.genome = genome
-        self.chrom = chrom
-        self.start = start
-        self.end = end
-        self.seqid = seqid
-        self._chroms = {}
-
-    def __getitem__(self, chrom):
-        """Return a chromosome by its name."""
-        if chrom in self._chroms:
-            return self._chroms[chrom]
-        else:
-            chromosome = ChromosomeSubset(chrom, self)
-            self._chroms[chrom] = chromosome
-            return chromosome
-
-
-class CDNACoord(object):
-    """
-    A HGVS cDNA-based coordinate.
-
-    A cDNA coordinate can take one of these forms:
-
-    N = nucleotide N in protein coding sequence (e.g. 11A>G)
-
-    -N = nucleotide N 5' of the ATG translation initiation codon (e.g. -4A>G)
-         NOTE: so located in the 5'UTR or 5' of the transcription initiation
-         site (upstream of the gene, incl. promoter)
-
-    *N = nucleotide N 3' of the translation stop codon (e.g. *6A>G)
-         NOTE: so located in the 3'UTR or 3' of the polyA-addition site
-         (including downstream of the gene)
-
-    N+M = nucleotide M in the intron after (3' of) position N in the coding DNA
-          reference sequence (e.g. 30+4A>G)
-
-    N-M = nucleotide M in the intron before (5' of) position N in the coding
-          DNA reference sequence (e.g. 301-2A>G)
-
-    -N+M / -N-M = nucleotide in an intron in the 5'UTR (e.g. -45+4A>G)
-
-    *N+M / *N-M = nucleotide in an intron in the 3'UTR (e.g. *212-2A>G)
-    """
-
-    def __init__(self, coord=0, offset=0, landmark=CDNA_START_CODON,
-                 string=''):
-        """
-        coord: main coordinate along cDNA on the same strand as the transcript
-
-        offset: an additional genomic offset from the main coordinate.  This
-                allows referencing non-coding (e.g. intronic) positions.
-                Offset is also interpreted on the coding strand.
-
-        landmark: ('cdna_start', 'cdna_stop') indicating that 'coord'
-                  is relative to one of these landmarks.
-
-        string: a coordinate from an HGVS name.  If given coord, offset, and
-                landmark should not be specified.
-        """
-
-        if string:
-            if coord != 0 or offset != 0 or landmark != CDNA_START_CODON:
-                raise ValueError("coord, offset, and landmark should not "
-                                 "be given with string argument")
-
-            self.parse(string)
-        else:
-            self.coord = coord
-            self.offset = offset
-            self.landmark = landmark
-
-    def parse(self, coord_text):
-        """
-        Parse a HGVS formatted cDNA coordinate.
-        """
-
-        match = re.match(r"(|-|\*)(\d+)((-|\+)(\d+))?", coord_text)
-        if not match:
-            raise ValueError("unknown coordinate format '%s'" % coord_text)
-        coord_prefix, coord, _, offset_prefix, offset = match.groups()
-
-        self.coord = int(coord)
-        self.offset = int(offset) if offset else 0
-
-        if offset_prefix == '-':
-            self.offset *= -1
-        elif offset_prefix == '+' or offset is None:
-            pass
-        else:
-            raise ValueError("unknown offset_prefix '%s'" % offset_prefix)
-
-        if coord_prefix == '':
-            self.landmark = CDNA_START_CODON
-        elif coord_prefix == "-":
-            self.coord *= -1
-            self.landmark = CDNA_START_CODON
-        elif coord_prefix == '*':
-            self.landmark = CDNA_STOP_CODON
-        else:
-            raise ValueError("unknown coord_prefix '%s'" % coord_prefix)
-        return self
-
-    def __str__(self):
-        """
-        Return a formatted cDNA coordinate
-        """
-        if self.landmark == CDNA_STOP_CODON:
-            coord_prefix = '*'
-        else:
-            coord_prefix = ''
-
-        if self.offset < 0:
-            offset = '%d' % self.offset
-        elif self.offset > 0:
-            offset = '+%d' % self.offset
-        else:
-            offset = ''
-
-        return '%s%d%s' % (coord_prefix, self.coord, offset)
-
-    def __eq__(self, other):
-        """Equality operator."""
-        return ((self.coord, self.offset, self.landmark) ==
-                (other.coord, other.offset, other.landmark))
-
-    def __repr__(self):
-        """
-        Returns a string representation of a cDNA coordinate.
-        """
-        if self.landmark != CDNA_START_CODON:
-            return "CDNACoord(%d, %d, '%s')" % (
-                self.coord, self.offset, self.landmark)
-        else:
-            return "CDNACoord(%d, %d)" % (self.coord, self.offset)
-
-
-# The RefSeq standard for naming contigs/transcripts/proteins:
-# http://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly  # nopep8
-REFSEQ_PREFIXES = [
-    ('AC_', 'genomic',
-     'Complete genomic molecule, usually alternate assembly'),
-    ('NC_', 'genomic',
-     'Complete genomic molecule, usually reference assembly'),
-    ('NG_', 'genomic', 'Incomplete genomic region'),
-    ('NT_', 'genomic', 'Contig or scaffold, clone-based or WGS'),
-    ('NW_', 'genomic', 'Contig or scaffold, primarily WGS'),
-    ('NS_', 'genomic', 'Environmental sequence'),
-    ('NZ_', 'genomic', 'Unfinished WGS'),
-    ('NM_', 'mRNA', ''),
-    ('NR_', 'RNA', ''),
-    ('XM_', 'mRNA', 'Predicted model'),
-    ('XR_', 'RNA', 'Predicted model'),
-    ('AP_', 'Protein', 'Annotated on AC_ alternate assembly'),
-    ('NP_', 'Protein', 'Associated with an NM_ or NC_ accession'),
-    ('YP_', 'Protein', ''),
-    ('XP_', 'Protein', 'Predicted model, associated with an XM_ accession'),
-    ('ZP_', 'Protein', 'Predicted model, annotated on NZ_ genomic records'),
-]
-
-REFSEQ_PREFIX_LOOKUP = dict(
-    (prefix, (kind, description))
-    for prefix, kind, description in REFSEQ_PREFIXES
-)
-
-
-def get_refseq_type(name):
-    """
-    Return the RefSeq type for a refseq name.
-    """
-    prefix = name[:3]
-    return REFSEQ_PREFIX_LOOKUP.get(prefix, (None, ''))[0]
-
-
-def get_exons(transcript):
-    """Yield exons in coding order."""
-    transcript_strand = transcript.tx_position.is_forward_strand
-    if hasattr(transcript.exons, 'select_related'):
-        exons = list(transcript.exons.select_related('tx_position'))
-    else:
-        exons = list(transcript.exons)
-    exons.sort(key=lambda exon: exon.tx_position.chrom_start)
-    if not transcript_strand:
-        exons.reverse()
-    return exons
-
-
-def get_coding_exons(transcript):
-    """Yield non-empty coding exonic regions in coding order."""
-    for exon in get_exons(transcript):
-        region = exon.get_as_interval(coding_only=True)
-        if region:
-            yield region
-
-
-def get_utr5p_size(transcript):
-    """Return the size of the 5prime UTR of a transcript."""
-
-    transcript_strand = transcript.tx_position.is_forward_strand
-    exons = get_exons(transcript)
-
-    # Find the exon containing the start codon.
-    start_codon = (transcript.cds_position.chrom_start if transcript_strand
-                   else transcript.cds_position.chrom_stop - 1)
-    cdna_len = 0
-    for exon in exons:
-        exon_start = exon.tx_position.chrom_start
-        exon_end = exon.tx_position.chrom_stop
-        if exon_start <= start_codon < exon_end:
-            break
-        cdna_len += exon_end - exon_start
-    else:
-        raise ValueError("transcript contains no exons")
-
-    if transcript_strand:
-        return cdna_len + (start_codon - exon_start)
-    else:
-        return cdna_len + (exon_end - start_codon - 1)
-
-
-def find_stop_codon(exons, cds_position):
-    """Return the position along the cDNA of the base after the stop codon."""
-    if cds_position.is_forward_strand:
-        stop_pos = cds_position.chrom_stop
-    else:
-        stop_pos = cds_position.chrom_start
-    cdna_pos = 0
-    for exon in exons:
-        exon_start = exon.tx_position.chrom_start
-        exon_stop = exon.tx_position.chrom_stop
-
-        if exon_start <= stop_pos <= exon_stop:
-            if cds_position.is_forward_strand:
-                return cdna_pos + stop_pos - exon_start
-            else:
-                return cdna_pos + exon_stop - stop_pos
-        else:
-            cdna_pos += exon_stop - exon_start
-    raise ValueError('Stop codon is not in any of the exons')
+from .models.cdna import CDNACoord, CDNA_START_CODON, CDNA_STOP_CODON
+from .models.genome import GenomeSubset
+from .models.hgvs_name import HGVSName, InvalidHGVSName
+from .models.variants import justify_indel, normalize_variant, revcomp
 
 
 def get_genomic_sequence(genome, chrom, start, end):
@@ -518,144 +31,9 @@ def get_genomic_sequence(genome, chrom, start, end):
         return str(genome[str(chrom)][start - 1:end]).upper()
 
 
-def cdna_to_genomic_coord(transcript, coord):
-    """Convert a HGVS cDNA coordinate to a genomic coordinate."""
-    transcript_strand = transcript.tx_position.is_forward_strand
-    exons = get_exons(transcript)
-    utr5p = (get_utr5p_size(transcript)
-             if transcript.is_coding else 0)
-
-    # compute starting position along spliced transcript.
-    if coord.landmark == CDNA_START_CODON:
-        if coord.coord > 0:
-            pos = utr5p + coord.coord
-        else:
-            pos = utr5p + coord.coord + 1
-    elif coord.landmark == CDNA_STOP_CODON:
-        if coord.coord < 0:
-            raise ValueError('CDNACoord cannot have a negative coord and '
-                             'landmark CDNA_STOP_CODON')
-        pos = find_stop_codon(exons, transcript.cds_position) + coord.coord
-    else:
-        raise ValueError('unknown CDNACoord landmark "%s"' % coord.landmark)
-
-    # 5' flanking sequence.
-    if pos < 1:
-        if transcript_strand:
-            return transcript.tx_position.chrom_start + pos
-        else:
-            return transcript.tx_position.chrom_stop - pos + 1
-
-    # Walk along transcript until we find an exon that contains pos.
-    cdna_start = 1
-    cdna_end = 1
-    for exon in exons:
-        exon_start = exon.tx_position.chrom_start + 1
-        exon_end = exon.tx_position.chrom_stop
-        cdna_end = cdna_start + (exon_end - exon_start)
-        if cdna_start <= pos <= cdna_end:
-            break
-        cdna_start = cdna_end + 1
-    else:
-        # 3' flanking sequence
-        if transcript_strand:
-            return transcript.cds_position.chrom_stop + coord.coord
-        else:
-            return transcript.cds_position.chrom_start + 1 - coord.coord
-
-    # Compute genomic coordinate using offset.
-    if transcript_strand:
-        # Plus strand.
-        return exon_start + (pos - cdna_start) + coord.offset
-    else:
-        # Minus strand.
-        return exon_end - (pos - cdna_start) - coord.offset
-
-
-def genomic_to_cdna_coord(transcript, genomic_coord):
-    """Convert a genomic coordinate to a cDNA coordinate and offset.
-    """
-    exons = [exon.get_as_interval()
-             for exon in get_exons(transcript)]
-
-    if len(exons) == 0:
-        return None
-
-    strand = transcript.strand
-
-    if strand == "+":
-        exons.sort(key=lambda exon: exon.chrom_start)
-    else:
-        exons.sort(key=lambda exon: -exon.chrom_end)
-
-    distances = [exon.distance(genomic_coord)
-                 for exon in exons]
-    min_distance_to_exon = min(map(abs, distances))
-
-    coding_offset = 0
-    for exon in exons:
-        exon_length = exon.chrom_end - exon.chrom_start
-        distance = exon.distance(genomic_coord)
-        if abs(distance) == min_distance_to_exon:
-            if strand == "+":
-                exon_start_cds_offset = coding_offset + 1
-                exon_end_cds_offset = coding_offset + exon_length
-            else:
-                exon_start_cds_offset = coding_offset + exon_length
-                exon_end_cds_offset = coding_offset + 1
-            # This is the exon we want to annotate against.
-            if distance == 0:
-                # Inside the exon.
-                if strand == "+":
-                    coord = (exon_start_cds_offset +
-                             (genomic_coord -
-                              (exon.chrom_start + 1)))
-                else:
-                    coord = (exon_end_cds_offset +
-                             (exon.chrom_end -
-                              genomic_coord))
-                cdna_coord = CDNACoord(coord, 0)
-            else:
-                # Outside the exon.
-                if distance > 0:
-                    nearest_exonic = exon_start_cds_offset
-                else:
-                    nearest_exonic = exon_end_cds_offset
-                if strand == "+":
-                    distance *= -1
-
-                # If outside transcript, don't use offset.
-                if (genomic_coord < transcript.tx_position.chrom_start + 1 or
-                        genomic_coord > transcript.tx_position.chrom_stop):
-                    nearest_exonic += distance
-                    distance = 0
-                cdna_coord = CDNACoord(nearest_exonic, distance)
-            break
-        coding_offset += exon_length
-
-    # Adjust coordinates for coding transcript.
-    if transcript.is_coding:
-        # Detect if position before start codon.
-        utr5p = get_utr5p_size(transcript) if transcript.is_coding else 0
-        cdna_coord.coord -= utr5p
-        if cdna_coord.coord <= 0:
-            cdna_coord.coord -= 1
-        else:
-            # Detect if position is after stop_codon.
-            exons = get_exons(transcript)
-            stop_codon = find_stop_codon(exons, transcript.cds_position)
-            stop_codon -= utr5p
-            if (cdna_coord.coord > stop_codon or
-                    cdna_coord.coord == stop_codon and cdna_coord.offset > 0):
-                cdna_coord.coord -= stop_codon
-                cdna_coord.landmark = CDNA_STOP_CODON
-
-    return cdna_coord
-
-
 def get_allele(hgvs, genome, transcript=None):
     """Get an allele from a HGVSName, a genome, and a transcript."""
-    chrom, start, end = hgvs.get_coords(transcript)
+    chrom, start, end = hgvs.get_ref_coords(transcript)
     _, alt = hgvs.get_ref_alt(
         transcript.tx_position.is_forward_strand if transcript else True)
     ref = get_genomic_sequence(genome, chrom, start, end)
@@ -672,553 +50,40 @@ def get_vcf_allele(hgvs, genome, transcript=None):
         transcript.tx_position.is_forward_strand if transcript else True)
     ref = get_genomic_sequence(genome, chrom, start, end)
 
+    # Sometimes we need to retrieve alt from reference
+    # Eg NC_000001.11:g.169549811=
+    if hgvs.mutation_type == "=":
+        alt = ref
+
     if hgvs.mutation_type in _indel_mutation_types:
+        if hgvs.mutation_type == 'dup':
+            # No alt supplied: NM_000492.3:c.1155_1156dup
+            # Number used:     NM_004119.2(FLT3):c.1794_1811dup18
+            # We *know* what the sequence is for "dup18", but not for "ins18"
+            if not hgvs.alt_allele or re.match("^N+$", hgvs.alt_allele):
+                alt = get_alt_from_sequence(hgvs, genome, transcript)
+
         # Left-pad alternate allele.
         alt = ref[0] + alt
     return chrom, start, end, ref, alt
 
 
+def get_alt_from_sequence(hgvs, genome, transcript):
+    """ returns ready for VCF """
+
+    chrom, start, end = hgvs.get_raw_coords(transcript)
+    return get_genomic_sequence(genome, chrom, start, end)
+
+
 def matches_ref_allele(hgvs, genome, transcript=None):
     """Return True if reference allele matches genomic sequence."""
-    ref, alt = hgvs.get_ref_alt(
-        transcript.tx_position.is_forward_strand if transcript else True)
-    chrom, start, end = hgvs.get_coords(transcript)
+    is_forward_strand = transcript.tx_position.is_forward_strand if transcript else True
+    ref, _ = hgvs.get_ref_alt(is_forward_strand, raw_dup_alleles=True)  # get raw values so dup isn't always True
+    chrom, start, end = hgvs.get_raw_coords(transcript)
     genome_ref = get_genomic_sequence(genome, chrom, start, end)
     return genome_ref == ref
 
 
-class InvalidHGVSName(ValueError):
-    def __init__(self, name='', part='name', reason=''):
-        if name:
-            message = 'Invalid HGVS %s "%s"' % (part, name)
-        else:
-            message = 'Invalid HGVS %s' % part
-        if reason:
-            message += ': ' + reason
-        super(InvalidHGVSName, self).__init__(message)
-
-        self.name = name
-        self.part = part
-        self.reason = reason
-
-
-class HGVSName(object):
-    """
-    Represents a HGVS variant name.
-    """
-
-    def __init__(self, name='', prefix='', chrom='', transcript='', gene='',
-                 kind='', mutation_type=None, start=0, end=0, ref_allele='',
-                 ref2_allele='', alt_allele='',
-                 cdna_start=None, cdna_end=None, pep_extra=''):
-
-        # Full HGVS name.
-        self.name = name
-
-        # Name parts.
-        self.prefix = prefix
-        self.chrom = chrom
-        self.transcript = transcript
-        self.gene = gene
-        self.kind = kind
-        self.mutation_type = mutation_type
-        self.start = start
-        self.end = end
-        self.ref_allele = ref_allele    # reference allele
-        self.ref2_allele = ref2_allele  # reference allele at end of pep indel
-        self.alt_allele = alt_allele    # alternate allele
-
-        # cDNA-specific fields
-        self.cdna_start = cdna_start if cdna_start else CDNACoord()
-        self.cdna_end = cdna_end if cdna_end else CDNACoord()
-
-        # Protein-specific fields
-        self.pep_extra = pep_extra
-
-        if name:
-            self.parse(name)
-
-    def parse(self, name):
-        """Parse a HGVS name."""
-        # Does HGVS name have transcript/gene prefix?
-        if ':' in name:
-            prefix, allele = name.split(':', 1)
-        else:
-            prefix = ''
-            allele = name
-
-        self.name = name
-
-        # Parse prefix and allele.
-        self.parse_allele(allele)
-        self.parse_prefix(prefix, self.kind)
-        self._validate()
-
-    def parse_prefix(self, prefix, kind):
-        """
-        Parse a HGVS prefix (gene/transcript/chromosome).
-
-        Some examples of full hgvs names with transcript include:
-          NM_007294.3:c.2207A>C
-          NM_007294.3(BRCA1):c.2207A>C
-          BRCA1{NM_007294.3}:c.2207A>C
-        """
-
-        self.prefix = prefix
-
-        # No prefix.
-        if prefix == '':
-            self.chrom = ''
-            self.transcript = ''
-            self.gene = ''
-            return
-
-        # Transcript and gene given with parens.
-        # example: NM_007294.3(BRCA1):c.2207A>C
-        match = re.match("^(?P<transcript>[^(]+)\((?P<gene>[^)]+)\)$", prefix)
-        if match:
-            self.transcript = match.group('transcript')
-            self.gene = match.group('gene')
-            return
-
-        # Transcript and gene given with braces.
-        # example: BRCA1{NM_007294.3}:c.2207A>C
-        match = re.match("^(?P<gene>[^{]+)\{(?P<transcript>[^}]+)\}$", prefix)
-        if match:
-            self.transcript = match.group('transcript')
-            self.gene = match.group('gene')
-            return
-
-        # Determine using Ensembl type.
-        if prefix.startswith('ENST'):
-            self.transcript = prefix
-            return
-
-        # Determine using refseq type.
-        refseq_type = get_refseq_type(prefix)
-        if refseq_type in ('mRNA', 'RNA'):
-            self.transcript = prefix
-            return
-
-        # Determine using refseq type.
-        if prefix.startswith(CHROM_PREFIX) or refseq_type == 'genomic':
-            self.chrom = prefix
-            return
-
-        # Assume gene name.
-        self.gene = prefix
-
-    def parse_allele(self, allele):
-        """
-        Parse a HGVS allele description.
-
-        Some examples include:
-          cDNA substitution: c.101A>C,
-          cDNA indel: c.3428delCinsTA, c.1000_1003delATG, c.1000_1001insATG
-          No protein change: p.Glu1161=
-          Protein change: p.Glu1161Ser
-          Protein frameshift: p.Glu1161_Ser1164?fs
-          Genomic substitution: g.1000100A>T
-          Genomic indel: g.1000100_1000102delATG
-        """
-        if '.' not in allele:
-            raise InvalidHGVSName(allele, 'allele',
-                                  'expected kind "c.", "p.", "g.", etc')
-
-        # Determine HGVS name kind.
-        kind, details = allele.split('.', 1)
-        self.kind = kind
-        self.mutation_type = None
-
-        if kind == "c":
-            self.parse_cdna(details)
-        elif kind == "p":
-            self.parse_protein(details)
-        elif kind == "g":
-            self.parse_genome(details)
-        else:
-            raise NotImplementedError("unknown kind: %s" % allele)
-
-    def parse_cdna(self, details):
-        """
-        Parse a HGVS cDNA name.
-
-        Some examples include:
-          Substitution: 101A>C,
-          Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG
-        """
-        for regex in HGVSRegex.CDNA_ALLELE_REGEXES:
-            match = re.match(regex, details)
-            if match:
-                groups = match.groupdict()
-
-                # Parse mutation type.
-                if groups.get('delins'):
-                    self.mutation_type = 'delins'
-                else:
-                    self.mutation_type = groups['mutation_type']
-
-                # Parse coordinates.
-                self.cdna_start = CDNACoord(string=groups.get('start'))
-                if groups.get('end'):
-                    self.cdna_end = CDNACoord(string=groups.get('end'))
-                else:
-                    self.cdna_end = CDNACoord(string=groups.get('start'))
-
-                # Parse alleles.
-                self.ref_allele = groups.get('ref', '')
-                self.alt_allele = groups.get('alt', '')
-
-                # Convert numerical allelles.
-                if self.ref_allele.isdigit():
-                    self.ref_allele = "N" * int(self.ref_allele)
-                if self.alt_allele.isdigit():
-                    self.alt_allele = "N" * int(self.alt_allele)
-
-                # Convert duplication alleles.
-                if self.mutation_type == "dup":
-                    self.alt_allele = self.ref_allele * 2
-
-                # Convert no match alleles.
-                if self.mutation_type == "=":
-                    self.alt_allele = self.ref_allele
-                return
-
-        raise InvalidHGVSName(details, 'cDNA allele')
-
-    def parse_protein(self, details):
-        """
-        Parse a HGVS protein name.
-
-        Some examples include:
-          No change: Glu1161=
-          Change: Glu1161Ser
-          Frameshift: Glu1161_Ser1164?fs
-        """
-        for regex in HGVSRegex.PEP_ALLELE_REGEXES:
-            match = re.match(regex, details)
-            if match:
-                groups = match.groupdict()
-
-                # Parse mutation type.
-                if groups.get('delins'):
-                    self.mutation_type = 'delins'
-                else:
-                    self.mutation_type = '>'
-
-                # Parse coordinates.
-                self.start = int(groups.get('start'))
-                if groups.get('end'):
-                    self.end = int(groups.get('end'))
-                else:
-                    self.end = self.start
-
-                # Parse alleles.
-                self.ref_allele = groups.get('ref', '')
-                if groups.get('ref2'):
-                    self.ref2_allele = groups.get('ref2')
-                    self.alt_allele = groups.get('alt', '')
-                else:
-                    # If alt is not given, assume matching with ref
-                    self.ref2_allele = self.ref_allele
-                    self.alt_allele = groups.get(
-                        'alt', self.ref_allele)
-
-                self.pep_extra = groups.get('extra')
-                return
-
-        raise InvalidHGVSName(details, 'protein allele')
-
-    def parse_genome(self, details):
-        """
-        Parse a HGVS genomic name.
-
-        Som examples include:
-          Substitution: 1000100A>T
-          Indel: 1000100_1000102delATG
-        """
-
-        for regex in HGVSRegex.GENOMIC_ALLELE_REGEXES:
-            match = re.match(regex, details)
-            if match:
-                groups = match.groupdict()
-
-                # Parse mutation type.
-                if groups.get('delins'):
-                    self.mutation_type = 'delins'
-                else:
-                    self.mutation_type = groups['mutation_type']
-
-                # Parse coordinates.
-                self.start = int(groups.get('start'))
-                if groups.get('end'):
-                    self.end = int(groups.get('end'))
-                else:
-                    self.end = self.start
-
-                # Parse alleles.
-                self.ref_allele = groups.get('ref', '')
-                self.alt_allele = groups.get('alt', '')
-
-                # Convert numerical allelles.
-                if self.ref_allele.isdigit():
-                    self.ref_allele = "N" * int(self.ref_allele)
-                if self.alt_allele.isdigit():
-                    self.alt_allele = "N" * int(self.alt_allele)
-
-                # Convert duplication alleles.
-                if self.mutation_type == "dup":
-                    self.alt_allele = self.ref_allele * 2
-
-                # Convert no match alleles.
-                if self.mutation_type == "=":
-                    self.alt_allele = self.ref_allele
-                return
-
-        raise InvalidHGVSName(details, 'genomic allele')
-
-    def _validate(self):
-        """
-        Check for internal inconsistencies in representation
-        """
-        if self.start > self.end:
-            raise InvalidHGVSName(reason="Coordinates are nonincreasing")
-
-    def __repr__(self):
-        try:
-            return "HGVSName('%s')" % self.format()
-        except NotImplementedError:
-            return "HGVSName('%s')" % self.name
-
-    def __unicode__(self):
-        return self.format()
-
-    def format(self, use_prefix=True, use_gene=True, use_counsyl=False):
-        """Generate a HGVS name as a string."""
-
-        if self.kind == 'c':
-            allele = 'c.' + self.format_cdna()
-        elif self.kind == 'p':
-            allele = 'p.' + self.format_protein()
-        elif self.kind == 'g':
-            allele = 'g.' + self.format_genome()
-        else:
-            raise NotImplementedError("not implemented: '%s'" % self.kind)
-
-        prefix = self.format_prefix(use_gene=use_gene) if use_prefix else ''
-
-        if prefix:
-            return prefix + ':' + allele
-        else:
-            return allele
-
-    def format_prefix(self, use_gene=True):
-        """
-        Generate HGVS trancript/gene prefix.
-
-        Some examples of full hgvs names with transcript include:
-          NM_007294.3:c.2207A>C
-          NM_007294.3(BRCA1):c.2207A>C
-        """
-
-        if self.kind == 'g':
-            if self.chrom:
-                return self.chrom
-
-        if self.transcript:
-            if use_gene and self.gene:
-                return '%s(%s)' % (self.transcript, self.gene)
-            else:
-                return self.transcript
-        else:
-            if use_gene:
-                return self.gene
-            else:
-                return ''
-
-    def format_cdna_coords(self):
-        """
-        Generate HGVS cDNA coordinates string.
-        """
-        # Format coordinates.
-        if self.cdna_start == self.cdna_end:
-            return str(self.cdna_start)
-        else:
-            return "%s_%s" % (self.cdna_start, self.cdna_end)
-
-    def format_dna_allele(self):
-        """
-        Generate HGVS DNA allele.
-        """
-        if self.mutation_type == '=':
-            # No change.
-            # example: 101A=
-            return self.ref_allele + '='
-
-        if self.mutation_type == '>':
-            # SNP.
-            # example: 101A>C
-            return self.ref_allele + '>' + self.alt_allele
-
-        elif self.mutation_type == 'delins':
-            # Indel.
-            # example: 112_117delAGGTCAinsTG, 112_117delinsTG
-            return 'del' + self.ref_allele + 'ins' + self.alt_allele
-
-        elif self.mutation_type in ('del', 'dup'):
-            # Delete, duplication.
-            # example: 1000_1003delATG, 1000_1003dupATG
-            return self.mutation_type + self.ref_allele
-
-        elif self.mutation_type == 'ins':
-            # Insert.
-            # example: 1000_1001insATG
-            return self.mutation_type + self.alt_allele
-
-        elif self.mutation_type == 'inv':
-            return self.mutation_type
-
-        else:
-            raise AssertionError(
-                "unknown mutation type: '%s'" % self.mutation_type)
-
-    def format_cdna(self):
-        """
-        Generate HGVS cDNA allele.
-
-        Some examples include:
-          Substitution: 101A>C,
-          Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG
-        """
-        return self.format_cdna_coords() + self.format_dna_allele()
-
-    def format_protein(self):
-        """
-        Generate HGVS protein name.
-
-        Some examples include:
-          No change: Glu1161=
-          Change: Glu1161Ser
-          Frameshift: Glu1161_Ser1164?fs
-        """
-        if (self.start == self.end and
-                self.ref_allele == self.ref2_allele == self.alt_allele):
-            # Match.
-            # Example: Glu1161=
-            pep_extra = self.pep_extra if self.pep_extra else '='
-            return self.ref_allele + str(self.start) + pep_extra
-
-        elif (self.start == self.end and
-                self.ref_allele == self.ref2_allele and
-                self.ref_allele != self.alt_allele):
-            # Change.
-            # Example: Glu1161Ser
-            return (self.ref_allele + str(self.start) +
-                    self.alt_allele + self.pep_extra)
-
-        elif self.start != self.end:
-            # Range change.
-            # Example: Glu1161_Ser1164?fs
-            return (self.ref_allele + str(self.start) + '_' +
-                    self.ref2_allele + str(self.end) +
-                    self.pep_extra)
-
-        else:
-            raise NotImplementedError('protein name formatting.')
-
-    def format_coords(self):
-        """
-        Generate HGVS cDNA coordinates string.
-        """
-        # Format coordinates.
-        if self.start == self.end:
-            return str(self.start)
-        else:
-            return "%s_%s" % (self.start, self.end)
-
-    def format_genome(self):
-        """
-        Generate HGVS genomic allele.
-
-        Som examples include:
-          Substitution: 1000100A>T
-          Indel: 1000100_1000102delATG
-        """
-        return self.format_coords() + self.format_dna_allele()
-
-    def get_coords(self, transcript=None):
-        """Return genomic coordinates of reference allele."""
-        if self.kind == 'c':
-            chrom = transcript.tx_position.chrom
-            start = cdna_to_genomic_coord(transcript, self.cdna_start)
-            end = cdna_to_genomic_coord(transcript, self.cdna_end)
-
-            if not transcript.tx_position.is_forward_strand:
-                if end > start:
-                    raise AssertionError(
-                        "cdna_start cannot be greater than cdna_end")
-                start, end = end, start
-            else:
-                if start > end:
-                    raise AssertionError(
-                        "cdna_start cannot be greater than cdna_end")
-
-            if self.mutation_type == "ins":
-                # Inserts have empty interval.
-                if start < end:
-                    start += 1
-                    end -= 1
-                else:
-                    end = start - 1
-
-            elif self.mutation_type == "dup":
-                end = start - 1
-
-        elif self.kind == 'g':
-            chrom = self.chrom
-            start = self.start
-            end = self.end
-
-        else:
-            raise NotImplementedError(
-                'Coordinates are not available for this kind of HGVS name "%s"'
-                % self.kind)
-
-        return chrom, start, end
-
-    def get_vcf_coords(self, transcript=None):
-        """Return genomic coordinates of reference allele in VCF-style."""
-        chrom, start, end = self.get_coords(transcript)
-
-        # Inserts and deletes require left-padding by 1 base
-        if self.mutation_type in ("=", ">"):
-            pass
-        elif self.mutation_type in ("del", "ins", "dup", "delins"):
-            # Indels have left-padding.
-            start -= 1
-        else:
-            raise NotImplementedError("Unknown mutation_type '%s'" %
-                                      self.mutation_type)
-        return chrom, start, end
-
-    def get_ref_alt(self, is_forward_strand=True):
-        """Return reference and alternate alleles."""
-        if self.kind == 'p':
-            raise NotImplementedError(
-                'get_ref_alt is not implemented for protein HGVS names')
-        alleles = [self.ref_allele, self.alt_allele]
-
-        # Represent duplications are inserts.
-        if self.mutation_type == "dup":
-            alleles[0] = ""
-            alleles[1] = alleles[1][:len(alleles[1]) // 2]
-
-        if is_forward_strand:
-            return alleles
-        else:
-            return tuple(map(revcomp, alleles))
-
-
 def hgvs_justify_dup(chrom, offset, ref, alt, genome):
     """
     Determines if allele is a duplication and justifies.
@@ -1290,8 +155,10 @@ def hgvs_justify_indel(chrom, offset, ref, alt, strand, genome):
         return chrom, offset, ref, alt
 
     # Get genomic sequence around the lesion.
-    start = max(offset - 100, 0)
-    end = offset + 100
+    window_size = 100
+    size = window_size + max(len(ref), len(alt))
+    start = max(offset - size, 0)
+    end = offset + size
     seq = str(genome[str(chrom)][start - 1:end]).upper()
     cds_offset = offset - start
 
@@ -1344,7 +211,8 @@ def hgvs_normalize_variant(chrom, offset, ref, alt, genome, transcript=None):
 
 def parse_hgvs_name(hgvs_name, genome, transcript=None,
                     get_transcript=lambda name: None,
-                    flank_length=30, normalize=True, lazy=False):
+                    flank_length=30, normalize=True, lazy=False,
+                    indels_start_with_same_base=True):
     """
     Parse an HGVS name into (chrom, start, end, ref, alt)
 
@@ -1353,15 +221,16 @@ def parse_hgvs_name(hgvs_name, genome, transcript=None,
     transcript: Transcript corresponding to HGVS name.
     normalize: If True, normalize allele according to VCF standard.
     lazy: If True, discard version information from incoming transcript/gene.
+    indels_start_with_same_base: When normalizing, don't strip common prefix from indels
     """
     hgvs = HGVSName(hgvs_name)
 
     # Determine transcript.
-    if hgvs.kind == 'c' and not transcript:
+    if hgvs.kind in ('c', 'n') and not transcript:
         if '.' in hgvs.transcript and lazy:
-            hgvs.transcript, version = hgvs.transcript.split('.')
+            hgvs.transcript, _ = hgvs.transcript.split('.')
         elif '.' in hgvs.gene and lazy:
-            hgvs.gene, version = hgvs.gene.split('.')
+            hgvs.gene, _ = hgvs.gene.split('.')
         if get_transcript:
             if hgvs.transcript:
                 transcript = get_transcript(hgvs.transcript)
@@ -1377,11 +246,12 @@ def parse_hgvs_name(hgvs_name, genome, transcript=None,
                               transcript.tx_position.chrom_stop,
                               hgvs.transcript)
 
-    chrom, start, end, ref, alt = get_vcf_allele(hgvs, genome, transcript)
+    chrom, start, _, ref, alt = get_vcf_allele(hgvs, genome, transcript)
     if normalize:
-        chrom, start, ref, [alt] = normalize_variant(
-            chrom, start, ref, [alt], genome,
-            flank_length=flank_length).variant
+        nv = normalize_variant(chrom, start, ref, [alt], genome,
+                               flank_length=flank_length,
+                               indels_start_with_same_base=indels_start_with_same_base)
+        chrom, start, ref, [alt] = nv.variant
     return (chrom, start, ref, alt)
 
 
@@ -1408,35 +278,39 @@ def variant_to_hgvs_name(chrom, offset, ref, alt, genome, transcript,
     hgvs = HGVSName()
 
     # Populate coordinates.
+    if mutation_type == 'ins':
+        # Insert uses coordinates around the insert point.
+        offset_start = offset - 1
+        offset_end = offset
+    else:
+        offset_start = offset
+        offset_end = offset + len(ref) - 1
+
     if not transcript:
         # Use genomic coordinate when no transcript is available.
         hgvs.kind = 'g'
-        hgvs.start = offset
-        hgvs.end = offset + len(ref) - 1
+        hgvs.start = offset_start
+        hgvs.end = offset_end
     else:
         # Use cDNA coordinates.
-        hgvs.kind = 'c'
+        if transcript.is_coding:
+            hgvs.kind = 'c'
+        else:
+            hgvs.kind = 'n'
+
         is_single_base_indel = (
             (mutation_type == 'ins' and len(alt) == 1) or
             (mutation_type in ('del', 'delins', 'dup') and len(ref) == 1))
 
         if mutation_type == '>' or (use_counsyl and is_single_base_indel):
             # Use a single coordinate.
-            hgvs.cdna_start = genomic_to_cdna_coord(transcript, offset)
+            hgvs.cdna_start = transcript.genomic_to_cdna_coord(offset)
             hgvs.cdna_end = hgvs.cdna_start
         else:
-            # Use a range of coordinates.
-            if mutation_type == 'ins':
-                # Insert uses coordinates around the insert point.
-                offset_start = offset - 1
-                offset_end = offset
-            else:
-                offset_start = offset
-                offset_end = offset + len(ref) - 1
             if transcript.strand == '-':
                 offset_start, offset_end = offset_end, offset_start
-            hgvs.cdna_start = genomic_to_cdna_coord(transcript, offset_start)
-            hgvs.cdna_end = genomic_to_cdna_coord(transcript, offset_end)
+            hgvs.cdna_start = transcript.genomic_to_cdna_coord(offset_start)
+            hgvs.cdna_end = transcript.genomic_to_cdna_coord(offset_end)
 
     # Populate prefix.
     if transcript:
diff --git a/pyhgvs/models.py b/pyhgvs/models.py
deleted file mode 100644
index b26b8c4..0000000
--- a/pyhgvs/models.py
+++ /dev/null
@@ -1,152 +0,0 @@
-"""
-Models for representing genomic elements.
-"""
-
-from __future__ import unicode_literals
-
-from collections import namedtuple
-
-
-class Position(object):
-    """A position in the genome."""
-
-    def __init__(self, chrom, chrom_start, chrom_stop, is_forward_strand):
-        self.chrom = chrom
-        self.chrom_start = chrom_start
-        self.chrom_stop = chrom_stop
-        self.is_forward_strand = is_forward_strand
-
-    def __repr__(self):
-        return "<Position %s[%d:%d]>" % (
-            self.chrom, self.chrom_start, self.chrom_stop)
-
-
-class Gene(object):
-    def __init__(self, name):
-        self.name = name
-
-
-class Transcript(object):
-    """RefGene Transcripts for hg19
-
-    A gene may have multiple transcripts with different combinations of exons.
-    """
-
-    def __init__(self, name, version, gene, tx_position, cds_position,
-                 is_default=False, exons=None):
-        self.name = name
-        self.version = version
-        self.gene = Gene(gene)
-        self.tx_position = tx_position
-        self.cds_position = cds_position
-        self.is_default = is_default
-        self.exons = exons if exons else []
-
-    @property
-    def full_name(self):
-        if self.version is not None:
-            return '%s.%d' % (self.name, self.version)
-        else:
-            return self.name
-
-    @property
-    def is_coding(self):
-        # Coding transcripts have CDS with non-zero length.
-        return (self.cds_position.chrom_stop -
-                self.cds_position.chrom_start > 0)
-
-    @property
-    def strand(self):
-        return ('+' if self.tx_position.is_forward_strand else '-')
-
-    @property
-    def coding_exons(self):
-        return [exon.get_as_interval(coding_only=True)
-                for exon in self.exons]
-
-
-BED6Interval_base = namedtuple(
-    "BED6Interval_base", (
-        "chrom",
-        "chrom_start",
-        "chrom_end",
-        "name",
-        "score",
-        "strand"))
-
-
-class BED6Interval(BED6Interval_base):
-    def distance(self, offset):
-        """Return the distance to the interval.
-
-        if offset is inside the exon, distance is zero.
-        otherwise, distance is the distance to the nearest edge.
-
-        distance is positive if the exon comes after the offset.
-        distance is negative if the exon comes before the offset.
-        """
-
-        start = self.chrom_start + 1
-        end = self.chrom_end
-
-        if start <= offset <= end:
-            return 0
-
-        start_distance = start - offset
-        end_distance = offset - end
-
-        if abs(start_distance) < abs(end_distance):
-            return start_distance
-        else:
-            return -end_distance
-
-
-class Exon(object):
-    def __init__(self, transcript, tx_position, exon_number):
-        self.transcript = transcript
-        self.tx_position = tx_position
-        self.exon_number = exon_number
-
-    @property
-    def get_exon_name(self):
-        return "%s.%d" % (self.transcript.name, self.exon_number)
-
-    def get_as_interval(self, coding_only=False):
-        """Returns the coding region for this exon as a BED6Interval.
-
-        This function returns a BED6Interval objects containing  position
-        information for this exon. This may be used as input for
-        pybedtools.create_interval_from_list() after casting chrom_start
-        and chrom_end as strings.
-
-        coding_only: only include exons in the coding region
-
-        """
-
-        exon_start = self.tx_position.chrom_start
-        exon_stop = self.tx_position.chrom_stop
-
-        # Get only exon coding region if requested
-        if coding_only:
-            if (exon_stop <= self.transcript.cds_position.chrom_start or
-                    exon_start >= self.transcript.cds_position.chrom_stop):
-                return None
-            exon_start = max(exon_start,
-                             self.transcript.cds_position.chrom_start)
-            exon_stop = min(
-                max(exon_stop, self.transcript.cds_position.chrom_start),
-                self.transcript.cds_position.chrom_stop)
-
-        return BED6Interval(
-            self.tx_position.chrom,
-            exon_start,
-            exon_stop,
-            self.get_exon_name,
-            '.',
-            self.strand,
-        )
-
-    @property
-    def strand(self):
-        strand = '+' if self.tx_position.is_forward_strand else '-'
-        return strand
diff --git a/pyhgvs/models/__init__.py b/pyhgvs/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyhgvs/models/cdna.py b/pyhgvs/models/cdna.py
new file mode 100644
index 0000000..bc1280f
--- /dev/null
+++ b/pyhgvs/models/cdna.py
@@ -0,0 +1,123 @@
+import re
+
+CDNA_START_CODON = 'cdna_start'
+CDNA_STOP_CODON = 'cdna_stop'
+
+
+class CDNACoord(object):
+    """
+    A HGVS cDNA-based coordinate.
+
+    A cDNA coordinate can take one of these forms:
+
+    N = nucleotide N in protein coding sequence (e.g. 11A>G)
+
+    -N = nucleotide N 5' of the ATG translation initiation codon (e.g. -4A>G)
+         NOTE: so located in the 5'UTR or 5' of the transcription initiation
+         site (upstream of the gene, incl. promoter)
+
+    *N = nucleotide N 3' of the translation stop codon (e.g. *6A>G)
+         NOTE: so located in the 3'UTR or 3' of the polyA-addition site
+         (including downstream of the gene)
+
+    N+M = nucleotide M in the intron after (3' of) position N in the coding DNA
+          reference sequence (e.g. 30+4A>G)
+
+    N-M = nucleotide M in the intron before (5' of) position N in the coding
+          DNA reference sequence (e.g. 301-2A>G)
+
+    -N+M / -N-M = nucleotide in an intron in the 5'UTR (e.g. -45+4A>G)
+
+    *N+M / *N-M = nucleotide in an intron in the 3'UTR (e.g. *212-2A>G)
+    """
+
+    def __init__(self, coord=0, offset=0, landmark=CDNA_START_CODON,
+                 string=''):
+        """
+        coord: main coordinate along cDNA on the same strand as the transcript
+
+        offset: an additional genomic offset from the main coordinate.  This
+                allows referencing non-coding (e.g. intronic) positions.
+                Offset is also interpreted on the coding strand.
+
+        landmark: ('cdna_start', 'cdna_stop') indicating that 'coord'
+                  is relative to one of these landmarks.
+
+        string: a coordinate from an HGVS name.  If given coord, offset, and
+                landmark should not be specified.
+        """
+
+        if string:
+            if coord != 0 or offset != 0 or landmark != CDNA_START_CODON:
+                raise ValueError("coord, offset, and landmark should not "
+                                 "be given with string argument")
+
+            self.parse(string)
+        else:
+            self.coord = coord
+            self.offset = offset
+            self.landmark = landmark
+
+    def parse(self, coord_text):
+        """
+        Parse a HGVS formatted cDNA coordinate.
+        """
+
+        match = re.match(r"(|-|\*)(\d+)((-|\+)(\d+))?", coord_text)
+        if not match:
+            raise ValueError("unknown coordinate format '%s'" % coord_text)
+        coord_prefix, coord, _, offset_prefix, offset = match.groups()
+
+        self.coord = int(coord)
+        self.offset = int(offset) if offset else 0
+
+        if offset_prefix == '-':
+            self.offset *= -1
+        elif offset_prefix == '+' or offset is None:
+            pass
+        else:
+            raise ValueError("unknown offset_prefix '%s'" % offset_prefix)
+
+        if coord_prefix == '':
+            self.landmark = CDNA_START_CODON
+        elif coord_prefix == "-":
+            self.coord *= -1
+            self.landmark = CDNA_START_CODON
+        elif coord_prefix == '*':
+            self.landmark = CDNA_STOP_CODON
+        else:
+            raise ValueError("unknown coord_prefix '%s'" % coord_prefix)
+        return self
+
+    def __str__(self):
+        """
+        Return a formatted cDNA coordinate
+        """
+        if self.landmark == CDNA_STOP_CODON:
+            coord_prefix = '*'
+        else:
+            coord_prefix = ''
+
+        if self.offset < 0:
+            offset = '%d' % self.offset
+        elif self.offset > 0:
+            offset = '+%d' % self.offset
+        else:
+            offset = ''
+
+        return '%s%d%s' % (coord_prefix, self.coord, offset)
+
+    def __eq__(self, other):
+        """Equality operator."""
+        return ((self.coord, self.offset, self.landmark) ==
+                (other.coord, other.offset, other.landmark))
+
+    def __repr__(self):
+        """
+        Returns a string representation of a cDNA coordinate.
+        """
+        if self.landmark != CDNA_START_CODON:
+            return "CDNACoord(%d, %d, '%s')" % (
+                self.coord, self.offset, self.landmark)
+        else:
+            return "CDNACoord(%d, %d)" % (self.coord, self.offset)
diff --git a/pyhgvs/models/genome.py b/pyhgvs/models/genome.py
new file mode 100644
index 0000000..4cfc87d
--- /dev/null
+++ b/pyhgvs/models/genome.py
@@ -0,0 +1,47 @@
+class ChromosomeSubset(object):
+    """
+    Allow direct access to a subset of the chromosome.
+    """
+
+    def __init__(self, name, genome=None):
+        self.name = name
+        self.genome = genome
+
+    def __getitem__(self, key):
+        """Return sequence from region [start, end)
+
+        Coordinates are 0-based, end-exclusive."""
+        if isinstance(key, slice):
+            start, end = (key.start, key.stop)
+            start -= self.genome.start
+            end -= self.genome.start
+            return self.genome.genome[self.genome.seqid][start:end]
+        else:
+            raise TypeError('Expected a slice object but '
+                            'received a {0}.'.format(type(key)))
+
+    def __repr__(self):
+        return 'ChromosomeSubset("%s")' % self.name
+
+
+class GenomeSubset(object):
+    """
+    Allow the direct access of a subset of the genome.
+    """
+
+    def __init__(self, genome, chrom, start, end, seqid):
+        self.genome = genome
+        self.chrom = chrom
+        self.start = start
+        self.end = end
+        self.seqid = seqid
+        self._chroms = {}
+
+    def __getitem__(self, chrom):
+        """Return a chromosome by its name."""
+        if chrom in self._chroms:
+            return self._chroms[chrom]
+        else:
+            chromosome = ChromosomeSubset(chrom, self)
+            self._chroms[chrom] = chromosome
+            return chromosome
\ No newline at end of file
diff --git a/pyhgvs/models/hgvs_name.py b/pyhgvs/models/hgvs_name.py
new file mode 100644
index 0000000..6d6cc46
--- /dev/null
+++ b/pyhgvs/models/hgvs_name.py
@@ -0,0 +1,814 @@
+"""
+HGVS language currently implemented.
+
+HGVS = ALLELE
+     | PREFIX_NAME : ALLELE
+
+PREFIX_NAME = TRANSCRIPT
+            | TRANSCRIPT '(' GENE ')'
+
+TRANSCRIPT = TRANSCRIPT_NAME
+           | TRANSCRIPT_NAME '.' TRANSCRIPT_VERSION
+
+TRANSCRIPT_VERSION = NUMBER
+
+ALLELE = 'c.' CDNA_ALLELE    # cDNA
+       | 'g.' GENOMIC_ALLELE # genomic
+       | 'm.' MIT_ALLELE     # mitochondrial sequence
+       | 'n.' NC_ALLELE      # non-coding RNA reference sequence
+       | 'r.' RNA_ALLELE     # RNA sequence (like r.76a>u)
+       | 'p.' PROTEIN_ALLELE # protein sequence (like  p.Lys76Asn)
+
+NC_ALLELE =
+RNA_ALLELE =
+CDNA_ALLELE = CDNA_COORD SINGLE_BASE_CHANGE
+            | CDNA_COORD_RANGE MULTI_BASE_CHANGE
+
+GENOMIC_ALLELE =
+MIT_ALLELE = COORD SINGLE_BASE_CHANGE
+           | COORD_RANGE MULTI_BASE_CHANGE
+
+SINGLE_BASE_CHANGE = CDNA_ALLELE = CDNA_COORD BASE '='        # no change
+                   | CDNA_COORD BASE '>' BASE                 # substitution
+                   | CDNA_COORD 'ins' BASE                    # 1bp insertion
+                   | CDNA_COORD 'del' BASE                    # 1bp deletion
+                   | CDNA_COORD 'dup' BASE                    # 1bp duplication
+                   | CDNA_COORD 'ins'                         # 1bp insertion
+                   | CDNA_COORD 'del'                         # 1bp deletion
+                   | CDNA_COORD 'dup'                         # 1bp duplication
+                   | CDNA_COORD 'del' BASE 'ins' BASE         # 1bp indel
+                   | CDNA_COORD 'delins' BASE                 # 1bp indel
+
+MULTI_BASE_CHANGE = COORD_RANGE 'del' BASES             # deletion
+                  | COORD_RANGE 'ins' BASES             # insertion
+                  | COORD_RANGE 'dup' BASES             # duplication
+                  | COORD_RANGE 'del'                   # deletion
+                  | COORD_RANGE 'dup'                   # duplication
+                  | COORD_RANGE 'del' BASES 'ins' BASES # indel
+                  | COORD_RANGE 'delins' BASES          # indel
+
+
+AMINO1 = [GAVLIMFWPSTCYNQDEKRH]
+
+AMINO3 = 'Gly' | 'Ala' | 'Val' | 'Leu' | 'Ile' | 'Met' | 'Phe' | 'Trp' | 'Pro'
+       | 'Ser' | 'Thr' | 'Cys' | 'Tyr' | 'Asn' | 'Gln' | 'Asp' | 'Glu' | 'Lys'
+       | 'Arg' | 'His'
+
+PROTEIN_ALLELE = AMINO3 COORD '='               # no peptide change
+               | AMINO1 COORD '='               # no peptide change
+               | AMINO3 COORD AMINO3 PEP_EXTRA  # peptide change
+               | AMINO1 COORD AMINO1 PEP_EXTRA  # peptide change
+               | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA        # indel
+               | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA        # indel
+               | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA AMINO3 # indel
+               | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA AMINO1 # indel
+
+# A genomic range:
+COORD_RANGE = COORD '_' COORD
+
+# A cDNA range:
+CDNA_COORD_RANGE = CDNA_COORD '_' CDNA_COORD
+
+# A cDNA coordinate:
+CDNA_COORD = COORD_PREFIX COORD
+           | COORD_PREFIX COORD OFFSET_PREFIX OFFSET
+COORD_PREFIX = '' | '-' | '*'
+COORD = NUMBER
+OFFSET_PREFIX = '-' | '+'
+OFFSET = NUMBER
+
+# Primatives:
+NUMBER = \d+
+BASE = [ACGT]
+BASES = BASE+
+
+"""
+
+import re
+
+from .cdna import CDNACoord
+from .variants import revcomp
+
+CHROM_PREFIX = 'chr'
+
+
+class HGVSRegex(object):
+    """
+    All regular expression for HGVS names.
+    """
+
+    # DNA syntax
+    # http://www.hgvs.org/mutnomen/standards.html#nucleotide
+    BASE = "[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]|\d+"
+    BASES = "[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\d+"
+    DNA_REF = "(?P<ref>" + BASES + ")"
+    DNA_ALT = "(?P<alt>" + BASES + ")"
+
+    # Mutation types
+    EQUAL = "(?P<mutation_type>=)"
+    SUB = "(?P<mutation_type>>)"
+    INS = "(?P<mutation_type>ins)"
+    DEL = "(?P<mutation_type>del)"
+    DUP = "(?P<mutation_type>dup)"
+
+    # Simple coordinate syntax
+    COORD_START = "(?P<start>\d+)"
+    COORD_END = "(?P<end>\d+)"
+    COORD_RANGE = COORD_START + "_" + COORD_END
+
+    # cDNA coordinate syntax
+    CDNA_COORD = ("(?P<coord_prefix>|-|\*)(?P<coord>\d+)"
+                  "((?P<offset_prefix>-|\+)(?P<offset>\d+))?")
+    CDNA_START = ("(?P<start>(?P<start_coord_prefix>|-|\*)(?P<start_coord>\d+)"
+                  "((?P<start_offset_prefix>-|\+)(?P<start_offset>\d+))?)")
+    CDNA_END = (r"(?P<end>(?P<end_coord_prefix>|-|\*)(?P<end_coord>\d+)"
+                "((?P<end_offset_prefix>-|\+)(?P<end_offset>\d+))?)")
+    CDNA_RANGE = CDNA_START + "_" + CDNA_END
+
+    # cDNA allele syntax
+    CDNA_ALLELE = [
+        # No change
+        CDNA_START + EQUAL,
+        CDNA_START + DNA_REF + EQUAL,
+
+        # Substitution
+        CDNA_START + DNA_REF + SUB + DNA_ALT,
+
+        # 1bp insertion, deletion, duplication
+        CDNA_START + INS + DNA_ALT,
+        CDNA_START + DEL + DNA_REF,
+        CDNA_START + DUP + DNA_REF,
+        CDNA_START + DEL,
+        CDNA_START + DUP,
+
+        # Insertion, deletion, duplication
+        CDNA_RANGE + INS + DNA_ALT,
+        CDNA_RANGE + DEL + DNA_REF,
+        CDNA_RANGE + DUP + DNA_REF,
+        CDNA_RANGE + DEL,
+        CDNA_RANGE + DUP,
+
+        # Indels
+        "(?P<delins>" + CDNA_START + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
+        "(?P<delins>" + CDNA_RANGE + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
+        "(?P<delins>" + CDNA_START + 'delins' + DNA_ALT + ")",
+        "(?P<delins>" + CDNA_RANGE + 'delins' + DNA_ALT + ")",
+    ]
+
+    CDNA_ALLELE_REGEXES = [re.compile("^" + regex + "$")
+                           for regex in CDNA_ALLELE]
+
+    # Peptide syntax
+    PEP = "([A-Z]([a-z]{2}))+"
+    PEP_REF = "(?P<ref>" + PEP + ")"
+    PEP_REF2 = "(?P<ref2>" + PEP + ")"
+    PEP_ALT = "(?P<alt>" + PEP + ")"
+
+    PEP_EXTRA = "(?P<extra>(|=|\?)(|fs))"
+
+    # Peptide allele syntax
+    PEP_ALLELE = [
+        # No peptide change
+        # Example: Glu1161=
+        PEP_REF + COORD_START + PEP_EXTRA,
+
+        # Peptide change
+        # Example: Glu1161Ser
+        PEP_REF + COORD_START + PEP_ALT + PEP_EXTRA,
+
+        # Peptide indel
+        # Example: Glu1161_Ser1164?fs
+        "(?P<delins>" + PEP_REF + COORD_START + "_" + PEP_REF2 + COORD_END +
+        PEP_EXTRA + ")",
+        "(?P<delins>" + PEP_REF + COORD_START + "_" + PEP_REF2 + COORD_END +
+        PEP_ALT + PEP_EXTRA + ")",
+    ]
+
+    PEP_ALLELE_REGEXES = [re.compile("^" + regex + "$")
+                          for regex in PEP_ALLELE]
+
+    # Genomic allele syntax
+    GENOMIC_ALLELE = [
+        # No change
+        COORD_START + EQUAL,
+        COORD_START + DNA_REF + EQUAL,
+
+        # Substitution
+        COORD_START + DNA_REF + SUB + DNA_ALT,
+
+        # 1bp insertion, deletion, duplication
+        COORD_START + INS + DNA_ALT,
+        COORD_START + DEL + DNA_REF,
+        COORD_START + DUP + DNA_REF,
+        COORD_START + DEL,
+        COORD_START + DUP,
+
+        # Insertion, deletion, duplication
+        COORD_RANGE + EQUAL,
+        COORD_RANGE + INS + DNA_ALT,
+        COORD_RANGE + DEL + DNA_REF,
+        COORD_RANGE + DUP + DNA_REF,
+        COORD_RANGE + DEL,
+        COORD_RANGE + DUP,
+
+        # Indels
+        "(?P<delins>" + COORD_START + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
+        "(?P<delins>" + COORD_RANGE + 'del' + DNA_REF + 'ins' + DNA_ALT + ")",
+        "(?P<delins>" + COORD_START + 'delins' + DNA_ALT + ")",
+        "(?P<delins>" + COORD_RANGE + 'delins' + DNA_ALT + ")",
+    ]
+
+    GENOMIC_ALLELE_REGEXES = [re.compile("^" + regex + "$")
+                              for regex in GENOMIC_ALLELE]
+
+
+# The RefSeq standard for naming contigs/transcripts/proteins:
+# http://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly  # nopep8
+REFSEQ_PREFIXES = [
+    ('AC_', 'genomic',
+     'Complete genomic molecule, usually alternate assembly'),
+    ('NC_', 'genomic',
+     'Complete genomic molecule, usually reference assembly'),
+    ('NG_', 'genomic', 'Incomplete genomic region'),
+    ('NT_', 'genomic', 'Contig or scaffold, clone-based or WGS'),
+    ('NW_', 'genomic', 'Contig or scaffold, primarily WGS'),
+    ('NS_', 'genomic', 'Environmental sequence'),
+    ('NZ_', 'genomic', 'Unfinished WGS'),
+    ('NM_', 'mRNA', ''),
+    ('NR_', 'RNA', ''),
+    ('XM_', 'mRNA', 'Predicted model'),
+    ('XR_', 'RNA', 'Predicted model'),
+    ('AP_', 'Protein', 'Annotated on AC_ alternate assembly'),
+    ('NP_', 'Protein', 'Associated with an NM_ or NC_ accession'),
+    ('YP_', 'Protein', ''),
+    ('XP_', 'Protein', 'Predicted model, associated with an XM_ accession'),
+    ('ZP_', 'Protein', 'Predicted model, annotated on NZ_ genomic records'),
+]
+
+
+REFSEQ_PREFIX_LOOKUP = dict(
+    (prefix, (kind, description))
+    for prefix, kind, description in REFSEQ_PREFIXES
+)
+
+
+def get_refseq_type(name):
+    """
+    Return the RefSeq type for a refseq name.
+    """
+    prefix = name[:3]
+    return REFSEQ_PREFIX_LOOKUP.get(prefix, (None, ''))[0]
+
+
+class InvalidHGVSName(ValueError):
+    def __init__(self, name='', part='name', reason=''):
+        if name:
+            message = 'Invalid HGVS %s "%s"' % (part, name)
+        else:
+            message = 'Invalid HGVS %s' % part
+        if reason:
+            message += ': ' + reason
+        super(InvalidHGVSName, self).__init__(message)
+
+        self.name = name
+        self.part = part
+        self.reason = reason
+
+
+class HGVSName(object):
+    """
+    Represents a HGVS variant name.
+    """
+
+    def __init__(self, name='', prefix='', chrom='', transcript='', gene='',
+                 kind='', mutation_type=None, start=0, end=0, ref_allele='',
+                 ref2_allele='', alt_allele='',
+                 cdna_start=None, cdna_end=None, pep_extra=''):
+
+        # Full HGVS name.
+        self.name = name
+
+        # Name parts.
+        self.prefix = prefix
+        self.chrom = chrom
+        self.transcript = transcript
+        self.gene = gene
+        self.kind = kind
+        self.mutation_type = mutation_type
+        self.start = start
+        self.end = end
+        self.ref_allele = ref_allele  # reference allele
+        self.ref2_allele = ref2_allele  # reference allele at end of pep indel
+        self.alt_allele = alt_allele  # alternate allele
+
+        # cDNA-specific fields
+        self.cdna_start = cdna_start if cdna_start else CDNACoord()
+        self.cdna_end = cdna_end if cdna_end else CDNACoord()
+
+        # Protein-specific fields
+        self.pep_extra = pep_extra
+
+        if name:
+            self.parse(name)
+
+    def parse(self, name):
+        """Parse a HGVS name."""
+        # Does HGVS name have transcript/gene prefix?
+        if ':' in name:
+            prefix, allele = name.split(':', 1)
+        else:
+            prefix = ''
+            allele = name
+
+        self.name = name
+
+        # Parse prefix and allele.
+        self.parse_allele(allele)
+        self.parse_prefix(prefix, self.kind)
+        self._validate()
+
+    def parse_prefix(self, prefix, kind):
+        """
+        Parse a HGVS prefix (gene/transcript/chromosome).
+
+        Some examples of full hgvs names with transcript include:
+          NM_007294.3:c.2207A>C
+          NM_007294.3(BRCA1):c.2207A>C
+          BRCA1{NM_007294.3}:c.2207A>C
+        """
+
+        self.prefix = prefix
+
+        # No prefix.
+        if prefix == '':
+            self.chrom = ''
+            self.transcript = ''
+            self.gene = ''
+            return
+
+        # Transcript and gene given with parens.
+        # example: NM_007294.3(BRCA1):c.2207A>C
+        match = re.match(r"^(?P<transcript>[^(]+)\((?P<gene>[^)]+)\)$", prefix)
+        if match:
+            self.transcript = match.group('transcript')
+            self.gene = match.group('gene')
+            return
+
+        # Transcript and gene given with braces.
+        # example: BRCA1{NM_007294.3}:c.2207A>C
+        match = re.match(r"^(?P<gene>[^{]+){(?P<transcript>[^}]+)}$", prefix)
+        if match:
+            self.transcript = match.group('transcript')
+            self.gene = match.group('gene')
+            return
+
+        # Determine using Ensembl type.
+        if prefix.startswith('ENST'):
+            self.transcript = prefix
+            return
+
+        # Determine using LRG type.
+        if prefix.startswith('LRG_'):
+            self.transcript = prefix
+            return
+
+        # Determine using refseq type.
+        refseq_type = get_refseq_type(prefix)
+        if refseq_type in ('mRNA', 'RNA'):
+            self.transcript = prefix
+            return
+
+        # Determine using refseq type.
+        if prefix.startswith(CHROM_PREFIX) or refseq_type == 'genomic':
+            self.chrom = prefix
+            return
+
+        # Assume gene name.
+        self.gene = prefix
+
+    def parse_allele(self, allele):
+        """
+        Parse a HGVS allele description.
+
+        Some examples include:
+          cDNA substitution: c.101A>C,
+          cDNA indel: c.3428delCinsTA, c.1000_1003delATG, c.1000_1001insATG
+          No protein change: p.Glu1161=
+          Protein change: p.Glu1161Ser
+          Protein frameshift: p.Glu1161_Ser1164?fs
+          Genomic substitution: g.1000100A>T
+          Genomic indel: g.1000100_1000102delATG
+        """
+        if '.' not in allele:
+            raise InvalidHGVSName(allele, 'allele',
+                                  'expected kind "c.", "p.", "g.", etc')
+
+        # Determine HGVS name kind.
+        kind, details = allele.split('.', 1)
+        self.kind = kind
+        self.mutation_type = None
+
+        if kind in ("c", 'n'):
+            self.parse_cdna(details)
+            if kind == 'n':  # Ensure no 3'UTR or 5'UTR coords in non-coding
+                if self.cdna_start.coord < 0:
+                    raise InvalidHGVSName(allele, 'allele',
+                                          "Non-coding transcript cannot contain negative (5'UTR) coordinates")
+                if self.cdna_start.landmark == 'cdna_stop' or self.cdna_end and self.cdna_end.landmark == 'cdna_stop':
+                    raise InvalidHGVSName(allele, 'allele',
+                                          "Non-coding transcript cannot contain '*' (3'UTR) coordinates")
+        elif kind == "p":
+            self.parse_protein(details)
+        elif kind in ("g", 'm'):
+            self.parse_genome(details)
+        else:
+            raise NotImplementedError("unknown kind: %s" % allele)
+
+    def parse_cdna(self, details):
+        """
+        Parse a HGVS cDNA name.
+
+        Some examples include:
+          Substitution: 101A>C,
+          Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG
+        """
+        for regex in HGVSRegex.CDNA_ALLELE_REGEXES:
+            match = re.match(regex, details)
+            if match:
+                groups = match.groupdict()
+
+                # Parse mutation type.
+                if groups.get('delins'):
+                    self.mutation_type = 'delins'
+                else:
+                    self.mutation_type = groups['mutation_type']
+
+                # Parse coordinates.
+                self.cdna_start = CDNACoord(string=groups.get('start'))
+                if groups.get('end'):
+                    self.cdna_end = CDNACoord(string=groups.get('end'))
+                else:
+                    self.cdna_end = CDNACoord(string=groups.get('start'))
+
+                # Parse alleles.
+                self.ref_allele = groups.get('ref', '')
+                self.alt_allele = groups.get('alt', '')
+
+                # Convert numerical allelles.
+                if self.ref_allele.isdigit():
+                    self.ref_allele = "N" * int(self.ref_allele)
+                if self.alt_allele.isdigit():
+                    self.alt_allele = "N" * int(self.alt_allele)
+
+                # Convert duplication alleles.
+                if self.mutation_type == "dup":
+                    self.alt_allele = self.ref_allele * 2
+
+                # Convert no match alleles.
+                if self.mutation_type == "=":
+                    self.alt_allele = self.ref_allele
+                return
+
+        raise InvalidHGVSName(details, 'cDNA allele')
+
+    def parse_protein(self, details):
+        """
+        Parse a HGVS protein name.
+
+        Some examples include:
+          No change: Glu1161=
+          Change: Glu1161Ser
+          Frameshift: Glu1161_Ser1164?fs
+        """
+        for regex in HGVSRegex.PEP_ALLELE_REGEXES:
+            match = re.match(regex, details)
+            if match:
+                groups = match.groupdict()
+
+                # Parse mutation type.
+                if groups.get('delins'):
+                    self.mutation_type = 'delins'
+                else:
+                    self.mutation_type = '>'
+
+                # Parse coordinates.
+                self.start = int(groups.get('start'))
+                if groups.get('end'):
+                    self.end = int(groups.get('end'))
+                else:
+                    self.end = self.start
+
+                # Parse alleles.
+                self.ref_allele = groups.get('ref', '')
+                if groups.get('ref2'):
+                    self.ref2_allele = groups.get('ref2')
+                    self.alt_allele = groups.get('alt', '')
+                else:
+                    # If alt is not given, assume matching with ref
+                    self.ref2_allele = self.ref_allele
+                    self.alt_allele = groups.get(
+                        'alt', self.ref_allele)
+
+                self.pep_extra = groups.get('extra')
+                return
+
+        raise InvalidHGVSName(details, 'protein allele')
+
+    def parse_genome(self, details):
+        """
+        Parse a HGVS genomic name.
+
+        Som examples include:
+          Substitution: 1000100A>T
+          Indel: 1000100_1000102delATG
+        """
+
+        for regex in HGVSRegex.GENOMIC_ALLELE_REGEXES:
+            match = re.match(regex, details)
+            if match:
+                groups = match.groupdict()
+
+                # Parse mutation type.
+                if groups.get('delins'):
+                    self.mutation_type = 'delins'
+                else:
+                    self.mutation_type = groups['mutation_type']
+
+                # Parse coordinates.
+                self.start = int(groups.get('start'))
+                if groups.get('end'):
+                    self.end = int(groups.get('end'))
+                else:
+                    self.end = self.start
+
+                # Parse alleles.
+                self.ref_allele = groups.get('ref', '')
+                self.alt_allele = groups.get('alt', '')
+
+                # Convert numerical alleles.
+                if self.ref_allele.isdigit():
+                    self.ref_allele = "N" * int(self.ref_allele)
+                if self.alt_allele.isdigit():
+                    self.alt_allele = "N" * int(self.alt_allele)
+
+                # Convert duplication alleles.
+                if self.mutation_type == "dup":
+                    self.alt_allele = self.ref_allele * 2
+
+                # Convert no match alleles.
+                if self.mutation_type == "=":
+                    self.alt_allele = self.ref_allele
+                return
+
+        raise InvalidHGVSName(details, 'genomic allele')
+
+    def _validate(self):
+        """
+        Check for internal inconsistencies in representation
+        """
+        if self.start > self.end:
+            raise InvalidHGVSName(reason="Coordinates are nonincreasing")
+
+    def __repr__(self):
+        try:
+            return "HGVSName('%s')" % self.format()
+        except NotImplementedError:
+            return "HGVSName('%s')" % self.name
+
+    def __unicode__(self):
+        return self.format()
+
+    def format(self, use_prefix=True, use_gene=True, use_counsyl=False):
+        """Generate a HGVS name as a string."""
+
+        if self.kind in ('c', 'n'):
+            allele = self.kind + '.' + self.format_cdna()
+        elif self.kind == 'p':
+            allele = 'p.' + self.format_protein()
+        elif self.kind in ('g', 'm'):
+            allele = self.kind + '.' + self.format_genome()
+        else:
+            raise NotImplementedError("not implemented: '%s'" % self.kind)
+
+        prefix = self.format_prefix(use_gene=use_gene) if use_prefix else ''
+
+        if prefix:
+            return prefix + ':' + allele
+        else:
+            return allele
+
+    def format_prefix(self, use_gene=True):
+        """
+        Generate HGVS trancript/gene prefix.
+
+        Some examples of full hgvs names with transcript include:
+          NM_007294.3:c.2207A>C
+          NM_007294.3(BRCA1):c.2207A>C
+        """
+
+        if self.kind in ('g', 'm'):
+            if self.chrom:
+                return self.chrom
+
+        if self.transcript:
+            if use_gene and self.gene:
+                return '%s(%s)' % (self.transcript, self.gene)
+            else:
+                return self.transcript
+        else:
+            if use_gene:
+                return self.gene
+            else:
+                return ''
+
+    def format_cdna_coords(self):
+        """
+        Generate HGVS cDNA coordinates string.
+        """
+        # Format coordinates.
+        if self.cdna_start == self.cdna_end:
+            return str(self.cdna_start)
+        else:
+            return "%s_%s" % (self.cdna_start, self.cdna_end)
+
+    def format_dna_allele(self):
+        """
+        Generate HGVS DNA allele.
+        """
+        if self.mutation_type == '=':
+            # No change.
+            # example: 101A=
+            return self.ref_allele + '='
+
+        if self.mutation_type == '>':
+            # SNP.
+            # example: 101A>C
+            return self.ref_allele + '>' + self.alt_allele
+
+        elif self.mutation_type == 'delins':
+            # Indel.
+            # example: 112_117delAGGTCAinsTG, 112_117delinsTG
+            return 'del' + self.ref_allele + 'ins' + self.alt_allele
+
+        elif self.mutation_type in ('del', 'dup'):
+            # Delete, duplication.
+            # example: 1000_1003delATG, 1000_1003dupATG
+            return self.mutation_type + self.ref_allele
+
+        elif self.mutation_type == 'ins':
+            # Insert.
+            # example: 1000_1001insATG
+            return self.mutation_type + self.alt_allele
+
+        elif self.mutation_type == 'inv':
+            return self.mutation_type
+
+        else:
+            raise AssertionError(
+                "unknown mutation type: '%s'" % self.mutation_type)
+
+    def format_cdna(self):
+        """
+        Generate HGVS cDNA allele.
+
+        Some examples include:
+          Substitution: 101A>C,
+          Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG
+        """
+        return self.format_cdna_coords() + self.format_dna_allele()
+
+    def format_protein(self):
+        """
+        Generate HGVS protein name.
+
+        Some examples include:
+          No change: Glu1161=
+          Change: Glu1161Ser
+          Frameshift: Glu1161_Ser1164?fs
+        """
+        if (self.start == self.end and
+                self.ref_allele == self.ref2_allele == self.alt_allele):
+            # Match.
+            # Example: Glu1161=
+            pep_extra = self.pep_extra if self.pep_extra else '='
+            return self.ref_allele + str(self.start) + pep_extra
+
+        elif (self.start == self.end and
+              self.ref_allele == self.ref2_allele and
+              self.ref_allele != self.alt_allele):
+            # Change.
+            # Example: Glu1161Ser
+            return (self.ref_allele + str(self.start) +
+                    self.alt_allele + self.pep_extra)
+
+        elif self.start != self.end:
+            # Range change.
+            # Example: Glu1161_Ser1164?fs
+            return (self.ref_allele + str(self.start) + '_' +
+                    self.ref2_allele + str(self.end) +
+                    self.pep_extra)
+
+        else:
+            raise NotImplementedError('protein name formatting.')
+
+    def format_coords(self):
+        """
+        Generate HGVS cDNA coordinates string.
+        """
+        # Format coordinates.
+        if self.start == self.end:
+            return str(self.start)
+        else:
+            return "%s_%s" % (self.start, self.end)
+
+    def format_genome(self):
+        """
+        Generate HGVS genomic allele.
+
+        Som examples include:
+          Substitution: 1000100A>T
+          Indel: 1000100_1000102delATG
+        """
+        return self.format_coords() + self.format_dna_allele()
+
+    def get_raw_coords(self, transcript=None):
+        """ return genomic coordinates """
+        if self.kind in ('c', 'n'):
+            chrom = transcript.tx_position.chrom
+            start = transcript.cdna_to_genomic_coord(self.cdna_start)
+            end = transcript.cdna_to_genomic_coord(self.cdna_end)
+
+            if not transcript.tx_position.is_forward_strand:
+                start, end = end, start
+
+            if start > end:
+                raise AssertionError(
+                    "cdna_start cannot be greater than cdna_end")
+        elif self.kind in ('g', 'm'):
+            chrom = self.chrom
+            start = self.start
+            end = self.end
+        else:
+            raise NotImplementedError(
+                'Coordinates are not available for this kind of HGVS name "%s"'
+                % self.kind)
+
+        # Check coordinate span is equal to reference bases
+        if self.ref_allele:
+            coordinate_span = end - start + 1  # Ref will always be >=1 base
+            ref_length = len(self.ref_allele)
+            if coordinate_span != ref_length:
+                raise InvalidHGVSName("Coordinate span (%d) not equal to ref length %d" % (coordinate_span, ref_length))
+
+        return chrom, start, end
+
+    def get_ref_coords(self, transcript=None):
+        """Return genomic coordinates of reference allele."""
+
+        chrom, start, end = self.get_raw_coords(transcript)
+
+        if self.mutation_type == "ins":
+            # Inserts have empty interval.
+            if start < end:
+                start += 1
+                end -= 1
+            else:
+                end = start - 1
+
+        elif self.mutation_type == "dup":
+            end = start - 1
+        return chrom, start, end
+
+    def get_vcf_coords(self, transcript=None):
+        """Return genomic coordinates of reference allele in VCF-style."""
+        chrom, start, end = self.get_ref_coords(transcript)
+
+        # Inserts and deletes require left-padding by 1 base
+        if self.mutation_type in ("=", ">"):
+            pass
+        elif self.mutation_type in ("del", "ins", "dup", "delins"):
+            # Indels have left-padding.
+            start -= 1
+        else:
+            raise NotImplementedError("Unknown mutation_type '%s'" %
+                                      self.mutation_type)
+        return chrom, start, end
+
+    def get_ref_alt(self, is_forward_strand=True, raw_dup_alleles=False):
+        """ Return reference and alternate alleles.
+            Original code was for representation - ie it altered dup to look like an insert
+            pass raw_dup_alleles=True to get the raw values """
+        if self.kind == 'p':
+            raise NotImplementedError(
+                'get_ref_alt is not implemented for protein HGVS names')
+        alleles = [self.ref_allele, self.alt_allele]
+
+        # Represent duplications are inserts.
+        if not raw_dup_alleles and self.mutation_type == "dup":
+            alleles[0] = ""
+            alleles[1] = alleles[1][:len(alleles[1]) // 2]
+
+        if is_forward_strand:
+            return alleles
+        else:
+            return tuple(map(revcomp, alleles))
diff --git a/pyhgvs/models/transcript.py b/pyhgvs/models/transcript.py
new file mode 100644
index 0000000..7d93233
--- /dev/null
+++ b/pyhgvs/models/transcript.py
@@ -0,0 +1,372 @@
+"""
+Models for representing genomic elements.
+"""
+
+from __future__ import unicode_literals
+
+from collections import namedtuple
+
+from lazy import lazy
+
+from pyhgvs.models.cdna import CDNA_START_CODON, CDNA_STOP_CODON, CDNACoord
+
+
+class Gene(object):
+    def __init__(self, name):
+        self.name = name
+
+
+class Transcript(object):
+    """
+    A gene may have multiple transcripts with different combinations of exons.
+
+    We need both exons and cdna_match as need to know exact exon boundaries to work out flanking
+    """
+
+    def __init__(self, name, version, gene, tx_position, cds_position,
+                 is_default=False, cdna_match=None,
+                 start_codon_transcript_pos=None, stop_codon_transcript_pos=None):
+        self.name = name
+        self.version = version
+        self.gene = Gene(gene)
+        self.tx_position = tx_position
+        self.cds_position = cds_position
+        self.is_default = is_default
+        self.cdna_match = cdna_match or []
+        # Optional pre-calculated transcript coordinates
+        self._start_codon_transcript_pos = start_codon_transcript_pos
+        self._stop_codon_transcript_pos = stop_codon_transcript_pos
+
+    @property
+    def full_name(self):
+        if self.version is not None:
+            return '%s.%d' % (self.name, self.version)
+        else:
+            return self.name
+
+    @property
+    def is_coding(self):
+        # Coding transcripts have CDS with non-zero length.
+        return (self.cds_position.chrom_stop -
+                self.cds_position.chrom_start > 0)
+
+    @property
+    def strand(self):
+        return ('+' if self.tx_position.is_forward_strand else '-')
+
+    @lazy
+    def ordered_cdna_match(self):
+        """ Coding order """
+        transcript_strand = self.tx_position.is_forward_strand
+        cdna_match = list(self.cdna_match)
+        cdna_match.sort(key=lambda cm: cm.tx_position.chrom_start)
+        if not transcript_strand:
+            cdna_match.reverse()
+        return cdna_match
+
+    def get_cds_start_stop(self):
+        (start, stop) = (self.cds_position.chrom_start, self.cds_position.chrom_stop)
+        if not self.tx_position.is_forward_strand:
+            (stop, start) = (start, stop)
+        return start, stop
+
+    @lazy
+    def start_codon(self):
+        """ Transcript position """
+
+        if self._start_codon_transcript_pos is not None:
+            return self._start_codon_transcript_pos
+
+        start_genomic_coordinate, _ = self.get_cds_start_stop()
+        return self._get_transcript_position(start_genomic_coordinate)
+
+    @lazy
+    def stop_codon(self):
+        """ Transcript position """
+        if self._stop_codon_transcript_pos is not None:
+            return self._stop_codon_transcript_pos
+
+        _, stop_genomic_coordinate = self.get_cds_start_stop()
+        return self._get_transcript_position(stop_genomic_coordinate)
+
+    def _get_transcript_position(self, genomic_coordinate, label=None):
+        transcript_strand = self.tx_position.is_forward_strand
+
+        cdna_offset = 0
+        for cdna_match in self.ordered_cdna_match:
+            start = cdna_match.tx_position.chrom_start
+            stop = cdna_match.tx_position.chrom_stop
+
+            if start <= genomic_coordinate <= stop:
+                # We're inside this match
+                if transcript_strand:
+                    position = genomic_coordinate - start
+                else:
+                    position = stop - genomic_coordinate
+                return cdna_offset + position + cdna_match.get_offset(position)
+            else:
+                cdna_offset += cdna_match.length
+        if label is None:
+            label = "Genomic coordinate: %d" % genomic_coordinate
+        raise ValueError('%s is not in any of the exons' % label)
+
+    def cdna_to_genomic_coord(self, coord):
+        """Convert a HGVS cDNA coordinate to a genomic coordinate."""
+        transcript_strand = self.tx_position.is_forward_strand
+
+        # compute starting position along spliced transcript.
+        if coord.landmark == CDNA_START_CODON:
+            utr5p = (self.start_codon if self.is_coding else 0)
+
+            if coord.coord > 0:
+                cdna_pos = utr5p + coord.coord
+            else:
+                cdna_pos = utr5p + coord.coord + 1
+        elif coord.landmark == CDNA_STOP_CODON:
+            if coord.coord < 0:
+                raise ValueError('CDNACoord cannot have a negative coord and '
+                                 'landmark CDNA_STOP_CODON')
+            cdna_pos = self.stop_codon + coord.coord
+        else:
+            raise ValueError('unknown CDNACoord landmark "%s"' % coord.landmark)
+
+        # 5' flanking sequence (no need to account for gaps)
+        if cdna_pos < 1:
+            if transcript_strand:
+                return self.tx_position.chrom_start + cdna_pos
+            else:
+                return self.tx_position.chrom_stop - cdna_pos + 1
+
+        # Walk along transcript until we find an exon that contains cdna_pos.
+        for cdna_match in self.ordered_cdna_match:
+            if cdna_match.cdna_start <= cdna_pos <= cdna_match.cdna_end:
+                match_pos = cdna_pos - cdna_match.cdna_start
+                match_pos -= cdna_match.get_offset(match_pos)
+                # Compute genomic coordinate using offset.
+                if transcript_strand:
+                    # Plus strand.
+                    start = cdna_match.tx_position.chrom_start + 1
+                    return start + match_pos + coord.offset
+                else:
+                    # Minus strand.
+                    end = cdna_match.tx_position.chrom_stop
+                    return end - match_pos - coord.offset
+        else:
+            # 3' flanking sequence (no need to account for gaps)
+            if transcript_strand:
+                return self.cds_position.chrom_stop + coord.coord
+            else:
+                return self.cds_position.chrom_start + 1 - coord.coord
+
+    def genomic_to_cdna_coord(self, genomic_coord):
+        """ Convert a genomic coordinate to a cDNA coordinate and offset. """
+        exons = [exon.get_as_interval()
+                 for exon in self.ordered_cdna_match]
+
+        if len(exons) == 0:
+            return None
+
+        strand = self.strand
+        distances = [exon.distance(genomic_coord)
+                     for exon in exons]
+        min_distance_to_exon = min(map(abs, distances))
+        if min_distance_to_exon:
+            # We're outside of exon - so need to find closest point
+            for exon in exons:
+                distance = exon.distance(genomic_coord)
+                if abs(distance) == min_distance_to_exon:
+
+                    # Outside the exon.
+                    if distance > 0:
+                        genomic_nearest_exon = exon.chrom_start + 1
+                    else:
+                        genomic_nearest_exon = exon.chrom_end
+
+                    if strand == "+":
+                        distance *= -1
+
+                    nearest_exon_coord = self._exon_genomic_to_cdna_coord(genomic_nearest_exon)
+
+                    # If outside transcript, don't use offset.
+                    if (genomic_coord < self.tx_position.chrom_start + 1 or
+                            genomic_coord > self.tx_position.chrom_stop):
+                        nearest_exon_coord += distance
+                        distance = 0
+                    cdna_coord = CDNACoord(nearest_exon_coord, distance)
+                    break
+            else:
+                raise ValueError("Could not find closest exon!")  # Should never happen
+        else:
+            coord = self._exon_genomic_to_cdna_coord(genomic_coord)
+            cdna_coord = CDNACoord(coord, 0)
+
+        # Adjust coordinates for coding transcript.
+        if self.is_coding:
+            # Detect if position before start codon.
+            utr5p = self.start_codon
+            cdna_coord.coord -= utr5p
+            if cdna_coord.coord <= 0:
+                cdna_coord.coord -= 1
+            else:
+                # Detect if position is after stop_codon.
+                stop_codon = self.stop_codon
+                stop_codon -= utr5p
+                if (cdna_coord.coord > stop_codon or
+                        cdna_coord.coord == stop_codon and cdna_coord.offset > 0):
+                    cdna_coord.coord -= stop_codon
+                    cdna_coord.landmark = CDNA_STOP_CODON
+
+        return cdna_coord
+
+    def _exon_genomic_to_cdna_coord(self, genomic_coord):
+        cdna_offset = 0
+        for cdna_match in self.ordered_cdna_match:
+            # Inside the exon.
+            if cdna_match.tx_position.chrom_start <= genomic_coord <= cdna_match.tx_position.chrom_stop:
+                if self.strand == "+":
+                    position = genomic_coord - (cdna_match.tx_position.chrom_start + 1)
+                else:
+                    position = cdna_match.tx_position.chrom_stop - genomic_coord
+                offset = cdna_match.get_offset(position, validate=False)
+                return cdna_offset + position + offset + 1
+            cdna_offset += cdna_match.length
+
+        raise ValueError(f"Couldn't find {genomic_coord=}!")
+
+
+BED6Interval_base = namedtuple(
+    "BED6Interval_base", (
+        "chrom",
+        "chrom_start",
+        "chrom_end",
+        "name",
+        "score",
+        "strand"))
+
+
+class BED6Interval(BED6Interval_base):
+    def distance(self, offset):
+        """Return the distance to the interval.
+
+        if offset is inside the exon, distance is zero.
+        otherwise, distance is the distance to the nearest edge.
+
+        distance is positive if the exon comes after the offset.
+        distance is negative if the exon comes before the offset.
+        """
+
+        start = self.chrom_start + 1
+        end = self.chrom_end
+
+        if start <= offset <= end:
+            return 0
+
+        start_distance = start - offset
+        end_distance = offset - end
+
+        if abs(start_distance) < abs(end_distance):
+            return start_distance
+        else:
+            return -end_distance
+
+
+class Exon(object):
+    """ We still need exons to work out the flanking boundaries """
+    def __init__(self, transcript, tx_position, number):
+        self.transcript = transcript
+        self.tx_position = tx_position
+        self.number = number
+
+    @property
+    def name(self):
+        return "%s.%d" % (self.transcript.name, self.number)
+
+    def get_as_interval(self, coding_only=False):
+        """Returns the coding region for this exon as a BED6Interval.
+
+        This function returns a BED6Interval objects containing  position
+        information for this exon. This may be used as input for
+        pybedtools.create_interval_from_list() after casting chrom_start
+        and chrom_end as strings.
+
+        coding_only: only include exons in the coding region
+
+        """
+
+        exon_start = self.tx_position.chrom_start
+        exon_stop = self.tx_position.chrom_stop
+
+        # Get only exon coding region if requested
+        if coding_only:
+            if (exon_stop <= self.transcript.cds_position.chrom_start or
+                    exon_start >= self.transcript.cds_position.chrom_stop):
+                return None
+            exon_start = max(exon_start,
+                             self.transcript.cds_position.chrom_start)
+            exon_stop = min(
+                max(exon_stop, self.transcript.cds_position.chrom_start),
+                self.transcript.cds_position.chrom_stop)
+
+        return BED6Interval(
+            self.tx_position.chrom,
+            exon_start,
+            exon_stop,
+            self.name,
+            '.',
+            self.strand,
+        )
+
+    @property
+    def strand(self):
+        strand = '+' if self.tx_position.is_forward_strand else '-'
+        return strand
+
+
+class CDNA_Match(Exon):
+    def __init__(self, transcript, tx_position, cdna_start, cdna_end, gap, number):
+        super(CDNA_Match, self).__init__(transcript, tx_position, number)
+        self.cdna_start = cdna_start
+        self.cdna_end = cdna_end
+        self.gap = gap
+
+    @property
+    def length(self):
+        return self.cdna_end - self.cdna_start + 1
+
+    def get_offset(self, position: int, validate=True):
+        """ cdna_match GAP attribute looks like: 'M185 I3 M250' which is code/length
+            @see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#the-gap-attribute
+            codes operation
+            M 	match
+            I 	insert a gap into the reference sequence
+            D 	insert a gap into the target (delete from reference)
+
+            If you want the whole exon, then pass the end
+        """
+
+        if not self.gap:
+            return 0
+
+        position_1_based = position + 1
+        cdna_match_index = 1
+        offset = 0
+        for gap_op in self.gap.split():
+            code = gap_op[0]
+            length = int(gap_op[1:])
+            if code == "M":
+                cdna_match_index += length
+            elif code == "I":
+                if validate and position_1_based < cdna_match_index + length:
+                    raise ValueError("Coordinate (%d) inside insertion (%s) - no mapping possible!" % (position_1_based, gap_op))
+                offset += length
+            elif code == "D":
+                if validate and position < cdna_match_index + length:
+                    raise ValueError("Coordinate (%d) inside deletion (%s) - no mapping possible!" % (position_1_based, gap_op))
+                offset -= length
+            else:
+                raise ValueError(f"Unknown code in cDNA GAP: {gap_op}")
+
+            if cdna_match_index > position_1_based:
+                break
+
+        return offset
diff --git a/pyhgvs/variants.py b/pyhgvs/models/variants.py
similarity index 90%
rename from pyhgvs/variants.py
rename to pyhgvs/models/variants.py
index fca0cdc..aced9fc 100644
--- a/pyhgvs/variants.py
+++ b/pyhgvs/models/variants.py
@@ -5,13 +5,24 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals
 
-from .models import Position
-
-
 _COMP = dict(A='T', C='G', G='C', T='A', N='N',
              a='t', c='g', g='c', t='a', n='n')
 
 
+class Position(object):
+    """A position in the genome."""
+
+    def __init__(self, chrom, chrom_start, chrom_stop, is_forward_strand):
+        self.chrom = chrom
+        self.chrom_start = chrom_start
+        self.chrom_stop = chrom_stop
+        self.is_forward_strand = is_forward_strand
+
+    def __repr__(self):
+        return "<Position %s[%d:%d]>" % (
+            self.chrom, self.chrom_start, self.chrom_stop)
+
+
 def revcomp(seq):
     """Reverse complement."""
     return ''.join(_COMP[base] for base in reversed(seq))
@@ -115,7 +126,7 @@ def justify_genomic_indel(genome, chrom, start, end, indel, justify,
 
 
 def normalize_variant(chrom, offset, ref_sequence, alt_sequences, genome,
-                      justify='left', flank_length=30):
+                      justify='left', flank_length=30, indels_start_with_same_base=True):
     """
     Normalize variant according to the GATK/VCF standard.
 
@@ -133,7 +144,7 @@ def normalize_variant(chrom, offset, ref_sequence, alt_sequences, genome,
         chrom_stop=end,
         is_forward_strand=True)
     return NormalizedVariant(position, ref_sequence, alt_sequences,
-                             genome=genome, justify=justify)
+                             genome=genome, justify=justify, indels_start_with_same_base=indels_start_with_same_base)
 
 
 class NormalizedVariant(object):
@@ -142,7 +153,8 @@ class NormalizedVariant(object):
     """
 
     def __init__(self, position, ref_allele, alt_alleles,
-                 seq_5p='', seq_3p='', genome=None, justify='left'):
+                 seq_5p='', seq_3p='', genome=None, justify='left',
+                 indels_start_with_same_base=True):
         """
         position: a 0-index genomic Position.
         ref_allele: the reference allele sequence.
@@ -150,6 +162,9 @@ def __init__(self, position, ref_allele, alt_alleles,
         seq_5p: 5 prime flanking sequence of variant.
         seq_3p: 3 prime flanking sequence of variant.
         genome: a pygr compatible genome object (optional).
+
+        indels_start_with_same_base: DML - I have no idea why this is required
+                                           but am keeping for backwards compat
         """
         self.position = position
         self.alleles = [ref_allele] + list(alt_alleles)
@@ -157,6 +172,7 @@ def __init__(self, position, ref_allele, alt_alleles,
         self.seq_3p = seq_3p
         self.genome = genome
         self.log = []
+        self.indels_start_with_same_base = indels_start_with_same_base
 
         self._on_forward_strand()
         self._trim_common_prefix()
@@ -203,7 +219,7 @@ def _trim_common_suffix(self):
         """
         minlength = min(map(len, self.alleles))
         common_suffix = 0
-        for i in range(1, minlength+1):
+        for i in range(1, minlength + 1):
             if len(set(allele[-i] for allele in self.alleles)) > 1:
                 # Not all alleles match at this site, so common suffix ends.
                 break
@@ -275,7 +291,7 @@ def _1bp_pad(self):
         # Pad sequences with one 5-prime base before the mutation event.
         empty_seq = any(not allele for allele in self.alleles)
         uniq_starts = set(allele[0] for allele in self.alleles if allele)
-        if empty_seq or len(uniq_starts) > 1:
+        if empty_seq or (self.indels_start_with_same_base and len(uniq_starts) > 1):
             # Fetch more 5p flanking sequence if needed.
             if self.genome and self.seq_5p == '':
                 start = self.position.chrom_start
@@ -299,9 +315,10 @@ def _1bp_pad(self):
                 self.seq_3p = self.seq_3p[1:]
                 self.position.chrom_stop += 1
 
-        if len(set(a[0] for a in self.alleles)) != 1:
-            raise AssertionError(
-                "All INDEL alleles should start with same base.")
+        if self.indels_start_with_same_base:
+            if len(set(a[0] for a in self.alleles)) != 1:
+                raise AssertionError(
+                    "All INDEL alleles should start with same base.")
 
     def _set_1based_position(self):
         """
diff --git a/pyhgvs/tests/data/test_name_to_variant.genome b/pyhgvs/tests/data/test_name_to_variant.genome
index 90eb59e..a36abfb 100644
--- a/pyhgvs/tests/data/test_name_to_variant.genome
+++ b/pyhgvs/tests/data/test_name_to_variant.genome
@@ -61,6 +61,7 @@ chr1	76205650	76205691	TTTATATATTCAAGGCTTATTGTGTAACAGAACCTGGAGCA
 chr1	76205639	76205669	TTTCTCTTGTTTTTATATATTCAAGGCTTA
 chr1	76205670	76205700	TGTGTAACAGAACCTGGAGCAGGCTCTGAT
 chr1	76227048	76227049	C
+chr1	76227049	76227050	T
 chr1	76227029	76227070	TAATGAGGGATGCCAAAATCTATCAGGTAAGGTTAAAGATG
 chr1	76227019	76227049	GTAGAAAAACTAATGAGGGATGCCAAAATC
 chr1	76227049	76227079	TATCAGGTAAGGTTAAAGATGATTTTTTTG
@@ -84,7 +85,13 @@ chr7	117176661	117176664	TAT
 chr7	117176642	117176684	CCTCAGAAATGATTGAAAATATCCAATCTGTTAAGGCATACT
 chr7	117176630	117176660	GACTTGTGATTACCTCAGAAATGATTGAAA
 chr7	117176662	117176692	ATCCAATCTGTTAAGGCATACTGCTGGGAA
+chr7	117182084	117182126	CAAGAATATAAGACATTGGAATATAACTTAACGACTACAGAA
+chr7	117182103	117182104	A
+chr7	117182103	117182106	AAT
+chr7	117182106	117182109	ATA
 chr7	117182106	117182107	A
+chr7	117182107	117182109	TA
+chr7	117182077	117182107	TACAAAAGCAAGAATATAAGACATTGGAATA
 chr7	117182087	117182129	GAATATAAGACATTGGAATATAACTTAACGACTACAGAAGTA
 chr7	117182074	117182104	CTTACAAAAGCAAGAATATAAGACATTGGA
 chr7	117182104	117182134	ATATAACTTAACGACTACAGAAGTAGTGAT
@@ -98,7 +105,11 @@ chr17	41245340	41245341	T
 chr17	41245340	41245341	T
 chr7	117307164	117307165	A
 chr11	17496507	17496508	T
+chr17	7125998	7126028	AAGAAGATGGGCATCAAGGCTTCAAACACA
+chr17	7126009	7126051	CATCAAGGCTTCAAACACAGCAGAGGTGTTCTTTGATGGAGT
 chr17	7126027	7126037	AGCAGAGGTG
+chr17	7126028	7126029	A
+chr17	7126028	7126058	GCAGAGGTGTTCTTTGATGGAGTACGGGTG
 chr1	76216233	76216235	AA
 chr1	76199213	76199220	AGGTCTT
 chr1	76199194	76199240	AACATTTTGATACTGTAGGAGGTCTTGGACTTGGAACTTTTGATGC
diff --git a/pyhgvs/tests/data/test_variant_to_name.genome b/pyhgvs/tests/data/test_variant_to_name.genome
index f310c9c..1f0b257 100644
--- a/pyhgvs/tests/data/test_variant_to_name.genome
+++ b/pyhgvs/tests/data/test_variant_to_name.genome
@@ -66,3 +66,9 @@ chr7	117188682	117188712	tTTTTTTAACAGGGATTTGGGGAATTATTT
 chr7	117188582	117188783	CCATGTGCTTTTCAAACTAATTGTACATAAAACAAGCATCTATTGAAAATATCTGACAAACTCATCTTTTATTTTTGAtgtgtgtgtgtgtgtgtgtgtgtTTTTTTAACAGGGATTTGGGGAATTATTTGAGAAAGCAAAACAAAACAATAACAATAGAAAAACTTCTAATGGTGATGACAGCCTCTTCTTCAGTAATTT
 chr7	117188687	117188689	TT
 chr7	117188689	117188691	AA
+chr17	7125928	7126129	TCCCCCAGTGACAACCTGTTGAACACACCTCTGCTTTCCCACACTGCCCTGACACAGTGGGCCCCCTGAGAAGAAGATGGGCATCAAGGCTTCAAACACAGCAGAGGTGTTCTTTGATGGAGTACGGGTGCCATCGGAGAACGTGCTGGGTGAGGTTGGGAGTGGCTTCAAGGTTGCCATGCACATCCTCAACAATGGAAG
+chr17	7126008	7126050	GCATCAAGGCTTCAAACACAGCAGAGGTGTTCTTTGATGGAG
+chr17	7126027	7126029	AG
+chr17	7126028	7126058	GCAGAGGTGTTCTTTGATGGAGTACGGGTG
+chr17	7126029	7126031	CA
+chr17	7125998	7126028	AAGAAGATGGGCATCAAGGCTTCAAACACA
\ No newline at end of file
diff --git a/pyhgvs/tests/genome.py b/pyhgvs/tests/genome.py
index 5c53da6..ecc35ae 100644
--- a/pyhgvs/tests/genome.py
+++ b/pyhgvs/tests/genome.py
@@ -4,7 +4,7 @@
 import itertools
 import os
 
-from ..variants import revcomp
+from ..models.variants import revcomp
 
 
 try:
@@ -77,9 +77,11 @@ def __init__(self, lookup=None, filename=None, db_filename=None,
             if SequenceFileDB is None:
                 raise ValueError('pygr is not available.')
             self._genome = SequenceFileDB(db_filename)
+            self._source_filename = db_filename
         elif filename:
             # Read genome sequence from lookup table.
             self.read(filename)
+            self._source_filename = filename
 
     def __contains__(self, chrom):
         """Return True if genome contains chromosome."""
@@ -113,8 +115,8 @@ def get_seq(self, chrom, start, end):
                         None, end - start))
                 else:
                     raise MockGenomeError(
-                        'Sequence not in test data: %s:%d-%d' %
-                        (chrom, start, end))
+                        'Sequence not in test data: %s:%d-%d source: %s' %
+                        (chrom, start, end, self._source_filename))
 
     def read(self, filename):
         """Read a sequence lookup table from a file.
diff --git a/pyhgvs/tests/test_cdna_match.py b/pyhgvs/tests/test_cdna_match.py
new file mode 100644
index 0000000..bbaf17f
--- /dev/null
+++ b/pyhgvs/tests/test_cdna_match.py
@@ -0,0 +1,161 @@
+"""
+    RefSeq transcripts don't always match the genome reference, so can align with gaps.
+"""
+
+import nose
+from pyhgvs import HGVSName
+
+from pyhgvs.utils import make_transcript
+
+_HGVS_NM_015120_GRCh37_COORDS = [
+    # Has Gap=M185 I3 M250 5' UTR length is 111 so last match is c.74 then 3 bases insert
+    ("NM_015120.4:c.74A>T", 73613070),
+    ("NM_015120.4:c.75G>T", None),  # No consistent alignment
+    ("NM_015120.4:c.76G>T", None),  # No consistent alignment
+    ("NM_015120.4:c.77A>T", None),  # No consistent alignment
+    ("NM_015120.4:c.78A>T", 73613071),
+    ("NM_015120.4:c.79G>C", 73613072),
+]
+
+_HGVS_NM_001135649_GRCh37_COORDS = [
+    # Has Gap=M460 I1 M337 5' UTR length is 158 so last match is c.302 then 1 base insert
+    ("NM_001135649.3:c.300C>T", 88751754),
+    ("NM_001135649.3:c.301A>C", 88751753),
+    ("NM_001135649.3:c.302C>T", 88751752),
+    ("NM_001135649.3:c.303C>T", None),  # N/A GRCh37 - 1 bp insert
+    ("NM_001135649.3:c.304T>C", 88751751),
+    ("NM_001135649.3:c.305T>C", 88751750),
+]
+
+_HGVS_NM_001012755_GRCh37_COORDS = [
+    # 133, 6207, "M1574 I1 M4500 - 5'UTR length is 163 so last match is transcript pos (1574+132) ie c.1543 then 1bp ins
+    ("NM_001012755.5:c.*619G>C", 103348398),  # NM_001012755.5:c.1543G>C
+    ("NM_001012755.5:c.*620G>C", None),       # NM_001012755.5:c.1544G>C - No GRCh37
+    ("NM_001012755.5:c.*621C>G", 103348397),  # NM_001012755.5:c.1545G>C
+]
+
+
+def test_cdna_to_genomic_coord():
+    transcript = get_transcript("NM_015120.4")
+    for hgvs_str, expected_genomic_coord in _HGVS_NM_015120_GRCh37_COORDS:
+        hgvs_name = HGVSName(hgvs_str)
+        if expected_genomic_coord:
+            genomic_coord = transcript.cdna_to_genomic_coord(hgvs_name.cdna_start)
+            nose.tools.assert_equal(genomic_coord, expected_genomic_coord)
+        else:
+            nose.tools.assert_raises(ValueError, transcript.cdna_to_genomic_coord, hgvs_name.cdna_start)
+
+
+def test_genomic_to_cdna_coord():
+    transcript = get_transcript("NM_015120.4")
+    for hgvs_str, genomic_coord in _HGVS_NM_015120_GRCh37_COORDS:
+        hgvs_name = HGVSName(hgvs_str)
+        if genomic_coord:
+            cdna_coord = transcript.genomic_to_cdna_coord(genomic_coord)
+            nose.tools.assert_equal(cdna_coord, hgvs_name.cdna_start)
+
+
+def test_cdna_to_genomic_coord_negative_strand():
+    transcript = get_transcript("NM_001135649.3")
+    for hgvs_str, expected_genomic_coord in _HGVS_NM_001135649_GRCh37_COORDS:
+        hgvs_name = HGVSName(hgvs_str)
+        if expected_genomic_coord:
+            genomic_coord = transcript.cdna_to_genomic_coord(hgvs_name.cdna_start)
+            nose.tools.assert_equal(genomic_coord, expected_genomic_coord)
+        else:
+            nose.tools.assert_raises(ValueError, transcript.cdna_to_genomic_coord, hgvs_name.cdna_start)
+
+
+def test_cdna_to_genomic_coord_negative_strand2():
+    transcript = get_transcript("NM_001012755.5")
+    for hgvs_str, expected_genomic_coord in _HGVS_NM_001012755_GRCh37_COORDS:
+        hgvs_name = HGVSName(hgvs_str)
+        if expected_genomic_coord:
+            genomic_coord = transcript.cdna_to_genomic_coord(hgvs_name.cdna_start)
+            nose.tools.assert_equal(genomic_coord, expected_genomic_coord)
+        else:
+            nose.tools.assert_raises(ValueError, transcript.cdna_to_genomic_coord, hgvs_name.cdna_start)
+
+
+def test_genomic_to_cdna_coord_negative_strand():
+    transcript = get_transcript("NM_001135649.3")
+    for hgvs_str, genomic_coord in _HGVS_NM_001135649_GRCh37_COORDS:
+        hgvs_name = HGVSName(hgvs_str)
+        if genomic_coord:
+            cdna_coord = transcript.genomic_to_cdna_coord(genomic_coord)
+            nose.tools.assert_equal(cdna_coord, hgvs_name.cdna_start)
+
+
+def test_genomic_to_cdna_coord_negative_strand2():
+    transcript = get_transcript("NM_001012755.5")
+
+    for hgvs_str, genomic_coord in _HGVS_NM_001012755_GRCh37_COORDS:
+        hgvs_name = HGVSName(hgvs_str)
+        if genomic_coord:
+            cdna_coord = transcript.genomic_to_cdna_coord(genomic_coord)
+            nose.tools.assert_equal(cdna_coord, hgvs_name.cdna_start)
+
+
+def get_transcript(accession):
+    transcript_json = _transcripts[accession]
+    return make_transcript(transcript_json)
+
+
+_transcripts = {
+    # GRCh37
+    "NM_015120.4": {
+        "id": "NM_015120.4",
+        "gene_name": "ALMS1",
+        "end": 73837046,
+        "chrom": "NC_000002.11",
+        "exons": [[73612885, 73613320], [73635749, 73635875], [73646250, 73646446], [73649984, 73650102],
+                  [73651557, 73652030], [73653580, 73653681], [73659325, 73659419], [73675089, 73681194],
+                  [73682288, 73682422], [73716760, 73718625], [73746901, 73747143], [73761950, 73762076],
+                  [73777393, 73777564], [73784346, 73784481], [73786098, 73786269], [73799388, 73800551],
+                  [73826527, 73826648], [73827804, 73828008], [73828321, 73828563], [73829311, 73829495],
+                  [73830367, 73830431], [73835601, 73835701], [73836694, 73837046]],
+        "start": 73612885,
+        "strand": "+",
+        "cds_end": 73836739,
+        "cds_start": 73612996,
+        "cdna_match": [[73612885, 73613320, 1, 438, "M185 I3 M250"], [73635749, 73635875, 439, 564, None],
+                       [73646250, 73646446, 565, 760, None], [73649984, 73650102, 761, 878, None],
+                       [73651557, 73652030, 879, 1351, None], [73653580, 73653681, 1352, 1452, None],
+                       [73659325, 73659419, 1453, 1546, None], [73675089, 73681194, 1547, 7654, "M141 I3 M5964"],
+                       [73682288, 73682422, 7655, 7788, None], [73716760, 73718625, 7789, 9653, None],
+                       [73746901, 73747143, 9654, 9895, None], [73761950, 73762076, 9896, 10021, None],
+                       [73777393, 73777564, 10022, 10192, None], [73784346, 73784481, 10193, 10327, None],
+                       [73786098, 73786269, 10328, 10498, None], [73799388, 73800551, 10499, 11661, None],
+                       [73826527, 73826648, 11662, 11782, None], [73827804, 73828008, 11783, 11986, None],
+                       [73828321, 73828563, 11987, 12228, None], [73829311, 73829495, 12229, 12412, None],
+                       [73830367, 73830431, 12413, 12476, None], [73835601, 73835701, 12477, 12576, None],
+                       [73836694, 73837046, 12577, 12928, None]]
+    },
+    "NM_001135649.3": {
+        # This is a strange case - there are 3 exons, but 2 cDNA match entries
+        # I think the exons are in error, as eg where is the splice site?
+        # So - we'll just use the cDNA as exons
+        "id": "NM_001135649.3",
+        "gene_name": "FOXI3",
+        "end": 88752211,
+        "chrom": "NC_000002.11",
+        "exons": [[88746305, 88748348], [88751414, 88751751], [88751753, 88752211]],
+        "start": 88746305,
+        "strand": "-",
+        "cds_end": 88752053,
+        "cds_start": 88747725,
+        "cdna_match": [[88746305, 88748348, 799, 2841, None], [88751414, 88752211, 1, 798, "M460 I1 M337"]]
+    },
+    "NM_001012755.5": {
+        "id": "NM_001012755.5",
+        "gene_name": "SLC25A53",
+        "end": 103401690,
+        "chrom": "NC_000023.10",
+        "exons": [[103343897, 103349971], [103401558, 103401690]],
+        "start": 103343897,
+        "strand": "-",
+        "cds_end": 103349940,
+        "cds_start": 103349016,
+        "cdna_match": [[103343897, 103349971, 133, 6207, "M1574 I1 M4500"], [103401558, 103401690, 1, 132, None]]
+    }
+}
diff --git a/pyhgvs/tests/test_hgvs_names.py b/pyhgvs/tests/test_hgvs_names.py
index be294e0..579e2a6 100644
--- a/pyhgvs/tests/test_hgvs_names.py
+++ b/pyhgvs/tests/test_hgvs_names.py
@@ -9,13 +9,9 @@
 except ImportError:
     SequenceFileDB = None
 
-from .. import CDNACoord
-from .. import CDNA_STOP_CODON
-from .. import HGVSName
-from .. import InvalidHGVSName
-from .. import cdna_to_genomic_coord
-from .. import format_hgvs_name
-from .. import genomic_to_cdna_coord
+from ..models.cdna import CDNACoord, CDNA_STOP_CODON
+from ..models.hgvs_name import HGVSName, InvalidHGVSName
+from .. import format_hgvs_name, matches_ref_allele
 from .. import parse_hgvs_name
 from ..utils import read_transcripts
 from .genome import MockGenomeTestFile
@@ -50,7 +46,7 @@ def test_genomic_to_cdna_coord():
     """
     for transcript_name, genomic_coord, cdna_coord_expected in _convert_coords:
         transcript = get_transcript(transcript_name)
-        cdna_coord = genomic_to_cdna_coord(transcript, genomic_coord[1])
+        cdna_coord = transcript.genomic_to_cdna_coord(genomic_coord[1])
         nose.tools.assert_equal(
             cdna_coord, cdna_coord_expected,
             repr((cdna_coord, cdna_coord_expected,
@@ -63,7 +59,7 @@ def test_cdna_to_genomic_coord():
     """
     for transcript_name, genomic_coord_expected, cdna_coord in _convert_coords:
         transcript = get_transcript(transcript_name)
-        genomic_coord = cdna_to_genomic_coord(transcript, cdna_coord)
+        genomic_coord = transcript.cdna_to_genomic_coord(cdna_coord)
         nose.tools.assert_equal(
             genomic_coord, genomic_coord_expected[1],
             repr((genomic_coord, genomic_coord_expected[1],
@@ -123,13 +119,22 @@ def test_variant_to_name():
     for (expected_hgvs_name, variant,
          name_canonical, var_canonical) in _name_variants:
         if name_canonical:
-            transcript_name = HGVSName(expected_hgvs_name).transcript
-            transcript = get_transcript(transcript_name)
-            assert transcript, transcript_name
+            hgvs = HGVSName(expected_hgvs_name)
+            transcript_name = hgvs.transcript
+            transcript = None
+            if transcript_name:
+                transcript = get_transcript(transcript_name)
+                assert transcript, transcript_name
             chrom, offset, ref, alt = variant
             hgvs_name = format_hgvs_name(
                 chrom, offset, ref, alt, genome, transcript,
                 use_gene=False)
+
+            if hgvs.kind == 'g':
+                # Strip off g.HGVS prefix
+                i = expected_hgvs_name.find(":g.")
+                expected_hgvs_name = expected_hgvs_name[i+1:]
+
             nose.tools.assert_equal(
                 hgvs_name, expected_hgvs_name,
                 repr([hgvs_name, expected_hgvs_name, variant]))
@@ -192,6 +197,31 @@ def test_invalid_coordinates():
     parse_hgvs_name(hgvs_name, genome, get_transcript=get_transcript)
 
 
+def test_matches_ref():
+    genome = MockGenomeTestFile(
+        db_filename='hg19.fa',
+        filename='pyhgvs/tests/data/test_hgvs.genome',
+        create_data=False)
+    # genome = SequenceFileDB('pyhgvs/tests/data/test_hgvs.genome')
+    transcript = get_transcript("NM_000016.4")
+
+    # The base at this position is "T" - used to always work w/dups
+    expected = [
+        ("NM_000016.4:c.1189dupT", True),
+        ("NM_000016.4:c.1189T>TT", True),  # same as dup by both alleles provided
+        ("NM_000016.4:c.1189dupA", False),
+        ("NM_000016.4:c.1189A>AA", False),
+        ("NM_000016.4:c.1189delT", True),
+        ("NM_000016.4:c.1189delC", False),
+        ("NM_000016.4:c.1189delCinsT", False),
+        ("NM_000016.4:c.1189delTinsC", True),
+    ]
+    for hgvs_string, expected_result in expected:
+        hgvs_name = HGVSName(hgvs_string)
+        matches_ref = matches_ref_allele(hgvs_name, genome, transcript)
+        nose.tools.assert_equal(matches_ref, expected_result, hgvs_string + " matches reference base")
+
+
 # Test examples of cDNA coordinates.
 _parse_cdna_coords = [
     ('1001', CDNACoord(1001)),
@@ -206,7 +236,7 @@ def test_invalid_coordinates():
 ]
 
 
-# Test examples of coverting coordinates.
+# Test examples of converting coordinates.
 _convert_coords = [
     # Positions near start codon.
     ('NM_000016.4', ('chr1', 76190473), CDNACoord(1)),
@@ -353,6 +383,18 @@ def test_invalid_coordinates():
          'mutation_type': '=',
      }),
 
+    # cDNA no change, no provided base
+    ('BRCA1:c.101=', True,
+     {
+         'gene': 'BRCA1',
+         'kind': 'c',
+         'cdna_start': CDNACoord(101),
+         'cdna_end': CDNACoord(101),
+         'ref_allele': '',
+         'alt_allele': '',
+         'mutation_type': '=',
+     }),
+
     # cDNA 1bp mutations.
     ('BRCA1:c.101A>C', True,
      {
@@ -537,6 +579,18 @@ def test_invalid_coordinates():
          'mutation_type': '=',
      }),
 
+    # Genomic no change + no ref base
+    ('BRCA1:g.101=', True,
+     {
+         'gene': 'BRCA1',
+         'kind': 'g',
+         'start': 101,
+         'end': 101,
+         'ref_allele': '',
+         'alt_allele': '',
+         'mutation_type': '=',
+     }),
+
     # Genomic 1bp mutations.
     ('BRCA1:g.101A>C', True,
      {
@@ -652,6 +706,11 @@ def test_invalid_coordinates():
          'alt_allele': 'TA',
          'mutation_type': 'delins',
      }),
+    #
+    ('NC_000010.10:g.89623915_89623920=', False,
+     {
+         'chrom': 'NC_000010.10',
+     }),
 ]
 
 
@@ -723,7 +782,11 @@ def test_invalid_coordinates():
     ('NM_000016.4:c.387+1delG', ('chr1', 76199309, 'TG', 'T'), True, True),
     ('NM_000016.4:c.475delT', ('chr1', 76205669, 'AT', 'A'), True, True),
     ('NM_000016.4:c.1189dupT', ('chr1', 76227049, 'C', 'CT'), True, True),
+    # Same as above but without optional trailing base
+    ('NM_000016.4:c.1189dup', ('chr1', 76227049, 'C', 'CT'), False, True),
     ('NM_000016.4:c.1191delT', ('chr1', 76227051, 'AT', 'A'), True, True),
+    # Same as above but without optional trailing base
+    ('NM_000016.4:c.1191del', ('chr1', 76227051, 'AT', 'A'), False, True),
     ('NM_000016.4:c.306_307insG', ('chr1', 76199232, 'T', 'TG'), True, True),
 
     # Alignment tests for HGVS 3' and VCF left-alignment.
@@ -734,6 +797,9 @@ def test_invalid_coordinates():
      True, True),
     ('NM_000492.3:c.1155_1156dupTA', ('chr7', 117182104, 'A', 'AAT'),
      True, True),
+    # Same as above but without optional trailing base
+    ('NM_000492.3:c.1155_1156dup', ('chr7', 117182104, 'A', 'AAT'),
+     False, True),
     ('NM_000492.3:c.3889dupT', ('chr7', 117292905, 'A', 'AT'), True, True),
 
     # Transcript prefix.
@@ -750,6 +816,9 @@ def test_invalid_coordinates():
     ('chr11:g.17496508T>C', ('chr11', 17496508, 'T', 'C'), False, True),
 
     # Genomic indels.
+    ('chr17:g.7126029_7126030insTG',
+     ('chr17', 7126028, 'A', 'AGT'), True, True),
+
     ('chr17:g.7126029_7126037delGCAGAGGTGinsTCAAAGCAC',
      ('chr17', 7126028, 'AGCAGAGGTG', 'ATCAAAGCAC'), False, True),
 
@@ -757,6 +826,13 @@ def test_invalid_coordinates():
     ('chr1:g.76216235delAinsGC', ('chr1', 76216234, 'AA', 'AGC'),
      False, True),
 
+    # Genomic dup
+    ('chr7:g.117182108_117182109dup', ('chr7', 117182104, 'A', 'AAT'),
+     False, True),
+
+    ('chr7:g.117182105_117182106dupAT', ('chr7', 117182104, 'A', 'AAT'),
+     False, True),
+
     # Genomic delete region.
     ('chr1:g.76199215_76199220delGGTCTT',
      ('chr1', 76199214, 'AGGTCTT', 'A'), False, True),
diff --git a/pyhgvs/tests/test_hgvs_names_long.py b/pyhgvs/tests/test_hgvs_names_long.py
index 9a019a7..2bfdfec 100644
--- a/pyhgvs/tests/test_hgvs_names_long.py
+++ b/pyhgvs/tests/test_hgvs_names_long.py
@@ -2,7 +2,7 @@
 
 from .. import parse_hgvs_name
 from ..utils import read_transcripts
-from ..variants import normalize_variant
+from ..models.variants import normalize_variant
 from .genome import MockGenomeTestFile
 
 
diff --git a/pyhgvs/tests/test_variants.py b/pyhgvs/tests/test_variants.py
index de37a45..c4b9b35 100644
--- a/pyhgvs/tests/test_variants.py
+++ b/pyhgvs/tests/test_variants.py
@@ -3,7 +3,7 @@
 
 from unittest import TestCase
 
-from ..variants import normalize_variant
+from ..models.variants import normalize_variant
 from .genome import MockGenomeTestFile
 
 
diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py
index 83e239e..3fcc320 100644
--- a/pyhgvs/utils.py
+++ b/pyhgvs/utils.py
@@ -5,63 +5,58 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals
 
-from .models import Exon
-from .models import Position
-from .models import Transcript
+import operator
+from .models.variants import Position
+from .models.transcript import Transcript, CDNA_Match, Exon
 
 
 def read_refgene(infile):
-    """
-    Iterate through a refGene file.
+    """ refGene = genePred with extra column at front (and ignored ones after) """
+    return read_genepred(infile, skip_first_column=True)
+
 
+def read_genepred(infile, skip_first_column=False):
+    """
     GenePred extension format:
     http://genome.ucsc.edu/FAQ/FAQformat.html#GenePredExt
 
     Column definitions:
-    0. uint undocumented id
-    1. string name;             "Name of gene (usually transcript_id from GTF)"
-    2. string chrom;                "Chromosome name"
-    3. char[1] strand;              "+ or - for strand"
-    4. uint txStart;                "Transcription start position"
-    5. uint txEnd;                  "Transcription end position"
-    6. uint cdsStart;               "Coding region start"
-    7. uint cdsEnd;                 "Coding region end"
-    8. uint exonCount;              "Number of exons"
-    9. uint[exonCount] exonStarts;  "Exon start positions"
-    10. uint[exonCount] exonEnds;   "Exon end positions"
-    11. uint id;                    "Unique identifier"
-    12. string name2;               "Alternate name (e.g. gene_id from GTF)"
-    13. string cdsStartStat;        "enum('none','unk','incmpl','cmpl')"
-    14. string cdsEndStat;          "enum('none','unk','incmpl','cmpl')"
-    15. lstring exonFrames;         "Exon frame offsets {0,1,2}"
+    0. string name;                 "Name of gene (usually transcript_id from GTF)"
+    1. string chrom;                "Chromosome name"
+    2. char[1] strand;              "+ or - for strand"
+    3. uint txStart;                "Transcription start position"
+    4. uint txEnd;                  "Transcription end position"
+    5. uint cdsStart;               "Coding region start"
+    6. uint cdsEnd;                 "Coding region end"
+    7. uint exonCount;              "Number of exons"
+    8. uint[exonCount] exonStarts;  "Exon start positions"
+    9. uint[exonCount] exonEnds;    "Exon end positions"
+    10. uint id;                    "Unique identifier"
+    11. string name2;               "Alternate name (e.g. gene_id from GTF)"
     """
     for line in infile:
         # Skip comments.
         if line.startswith('#'):
             continue
         row = line.rstrip('\n').split('\t')
-        if len(row) != 16:
-            raise ValueError(
-                'File has incorrect number of columns '
-                'in at least one line.')
+        if skip_first_column:
+            row = row[1:]
 
         # Skip trailing ,
-        exon_starts = list(map(int, row[9].split(',')[:-1]))
-        exon_ends = list(map(int, row[10].split(',')[:-1]))
-        exon_frames = list(map(int, row[15].split(',')[:-1]))
+        exon_starts = list(map(int, row[8].split(',')[:-1]))
+        exon_ends = list(map(int, row[9].split(',')[:-1]))
         exons = list(zip(exon_starts, exon_ends))
 
         yield {
-            'chrom': row[2],
-            'start': int(row[4]),
-            'end': int(row[5]),
-            'id': row[1],
-            'strand': row[3],
-            'cds_start': int(row[6]),
-            'cds_end': int(row[7]),
-            'gene_name': row[12],
+            'chrom': row[1],
+            'start': int(row[3]),
+            'end': int(row[4]),
+            'id': row[0],
+            'strand': row[2],
+            'cds_start': int(row[5]),
+            'cds_end': int(row[6]),
+            'gene_name': row[11],
             'exons': exons,
-            'exon_frames': exon_frames
         }
 
 
@@ -89,25 +84,72 @@ def make_transcript(transcript_json):
             transcript_json['chrom'],
             transcript_json['cds_start'],
             transcript_json['cds_end'],
-            transcript_json['strand'] == '+'))
+            transcript_json['strand'] == '+'),
+        start_codon_transcript_pos=transcript_json.get("start_codon_transcript_pos"),
+        stop_codon_transcript_pos=transcript_json.get("stop_codon_transcript_pos"),
+    )
 
     exons = transcript_json['exons']
+    exons.sort(key=operator.itemgetter(0))
+    cdna_match = transcript_json.get('cdna_match', [])
+    cdna_match.sort(key=operator.itemgetter(0))
+
     if not transcript.tx_position.is_forward_strand:
-        exons = reversed(exons)
-
-    for exon_number, (exon_start, exon_end) in enumerate(exons, 1):
-        transcript.exons.append(
-            Exon(transcript=transcript,
-                 tx_position=Position(
-                     transcript_json['chrom'],
-                     exon_start,
-                     exon_end,
-                     transcript_json['strand'] == '+'),
-                 exon_number=exon_number))
+        exons.reverse()
+        cdna_match.reverse()
+
+    # We don't use exons, but run everything through cDNA match so there's just 1 path
+    # exons are treated as a perfect cDNA match
+    if not cdna_match:
+        cdna_match = json_perfect_exons_to_cdna_match(exons)
+
+    for number, (exon_start, exon_end, cdna_start, cdna_end, gap) in enumerate(cdna_match, 1):
+        transcript.cdna_match.append(CDNA_Match(transcript=transcript,
+                                                tx_position=Position(
+                                                    transcript_json['chrom'],
+                                                    exon_start,
+                                                    exon_end,
+                                                    transcript_json['strand'] == '+'),
+                                                cdna_start=cdna_start,
+                                                cdna_end=cdna_end,
+                                                gap=gap,
+                                                number=number))
 
     return transcript
 
 
+def json_perfect_exons_to_cdna_match(ordered_exons, single=False):
+    """ Perfectly matched exons are basically a no-gap case of cDNA match
+        single - use a single cDNA match (deletions for introns) - this is currently broken do not use
+    """
+    cdna_match = []
+    if single:
+        ordered_exons = list(ordered_exons)
+        start = ordered_exons[0][0]
+        end = ordered_exons[-1][1]
+        last_exon_end = None
+        gap_list = []
+        cdna_length = 0
+        for (exon_start, exon_end) in ordered_exons:
+            # end up looking like "M D M D (M=exon, D=intron length)"
+            if last_exon_end:
+                intron_length = abs(exon_start - last_exon_end)
+                gap_list.append("D%d" % intron_length)
+            exon_length = exon_end - exon_start
+            cdna_length += exon_length
+            gap_list.append("M%d" % exon_length)
+            last_exon_end = exon_end
+        cdna_match = [[start, end, 1, cdna_length, " ".join(gap_list)]]
+    else:
+        cdna_start = 1
+        for (exon_start, exon_end) in ordered_exons:
+            exon_length = exon_end - exon_start
+            cdna_end = cdna_start + exon_length - 1
+            cdna_match.append([exon_start, exon_end, cdna_start, cdna_end, None])
+            cdna_start = cdna_end + 1
+    return cdna_match
+
+
 def read_transcripts(refgene_file):
     """
     Read all transcripts in a RefGene file.
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 1018450..5b2667d 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,3 +1,4 @@
 flake8==2.2.5
+lazy==1.4
 nose==1.3.7
 pyfaidx==0.5.0.1
diff --git a/setup.py b/setup.py
index 3f94c25..34bcc44 100644
--- a/setup.py
+++ b/setup.py
@@ -20,17 +20,18 @@ def main():
 
     setup(
         name='pyhgvs',
-        version='0.11.1',
+        version='0.12.5',
         description='HGVS name parsing and formatting',
         long_description=description,
         author='Matt Rasmussen',
         author_email='rasmus@counsyl.com',
-        packages=[str('pyhgvs'), str('pyhgvs.tests')],
+        packages=[str('pyhgvs'), str('pyhgvs.models'), str('pyhgvs.tests')],
         include_package_data=True,
         scripts=[],
         install_requires=['pip>=1.2'],
         tests_require=[
           'flake8==2.2.5',
+          'lazy==1.4',
           'nose==1.3.7',
           'pyfaidx==0.5.0.1',
         ],