[galaxy-commits] commit/galaxy-central: jgoecks: Trackster: improve method for fetching additional data and simply data fetching.

Bitbucket commits-noreply at bitbucket.org
Tue Sep 13 16:55:26 EDT 2011


1 new changeset in galaxy-central:

http://bitbucket.org/galaxy/galaxy-central/changeset/c6452f6558ee/
changeset:   c6452f6558ee
user:        jgoecks
date:        2011-09-13 22:55:17
summary:     Trackster: improve method for fetching additional data and simply data fetching.
affected #:  3 files (1.4 KB)

--- a/lib/galaxy/visualization/tracks/data_providers.py	Mon Sep 12 14:51:26 2011 -0400
+++ b/lib/galaxy/visualization/tracks/data_providers.py	Tue Sep 13 16:55:17 2011 -0400
@@ -3,7 +3,7 @@
 """
 
 import sys
-from math import floor, ceil, log, pow
+from math import ceil, log
 import pkg_resources
 pkg_resources.require( "bx-python" )
 if sys.version_info[:2] == (2, 4):
@@ -13,14 +13,12 @@
 from galaxy.datatypes.util.gff_util import *
 from galaxy.util.json import from_json_string
 from bx.interval_index_file import Indexes
-from bx.arrays.array_tree import FileArrayTreeDict
 from bx.bbi.bigwig_file import BigWigFile
 from galaxy.util.lrucache import LRUCache
 from galaxy.visualization.tracks.summary import *
 import galaxy_utils.sequence.vcf
 from galaxy.datatypes.tabular import Vcf
 from galaxy.datatypes.interval import Bed, Gff, Gtf
-from galaxy.datatypes.util.gff_util import parse_gff_attributes
 
 from pysam import csamtools, ctabix
 
@@ -32,7 +30,20 @@
         return None
     else:
         return float(n)
-
+        
+def get_bounds( reads, start_pos_index, end_pos_index ):
+    """
+    Returns the minimum and maximum position for a set of reads.
+    """
+    max_low = sys.maxint
+    max_high = -sys.maxint
+    for read in reads:
+        if read[ start_pos_index ] < max_low:
+            max_low = read[ start_pos_index ]
+        if read[ end_pos_index ] > max_high:
+            max_high = read[ end_pos_index ]
+    return max_low, max_high
+        
 class TracksDataProvider( object ):
     """ Base class for tracks data providers. """
     
@@ -75,8 +86,11 @@
     def get_data( self, chrom, start, end, start_val=0, max_vals=None, **kwargs ):
         """ 
         Returns data in region defined by chrom, start, and end. start_val and
-        max_vals are used to denote the data to return: start_val is the first value to 
+        max_vals are used to denote the data to return: start_val is the first element to 
         return and max_vals indicates the number of values to return.
+        
+        Return value must be a dictionary with the following attributes:
+            dataset_type, data
         """
         # Override.
         pass
@@ -218,18 +232,23 @@
     
     def get_data( self, chrom, start, end, start_val=0, max_vals=sys.maxint, **kwargs ):
         """
-        Fetch reads in the region.
+        Fetch reads in the region and additional metadata.
         
-        Each read is a list with the format 
-            [<guid>, <start>, <end>, <name>, <read_1>, <read_2>] 
-        where <read_1> has the format
-            [<start>, <end>, <cigar>, ?<read_seq>?]
-        and <read_2> has the format
-            [<start>, <end>, <cigar>, ?<read_seq>?]
-        For single-end reads, read has format:
-            [<guid>, <start>, <end>, <name>, cigar, seq] 
-        NOTE: read end and sequence data are not valid for reads outside of
-        requested region and should not be used.
+        Returns a dict with the following attributes:
+            data - a list of reads with the format 
+                    [<guid>, <start>, <end>, <name>, <read_1>, <read_2>] 
+                where <read_1> has the format
+                    [<start>, <end>, <cigar>, ?<read_seq>?]
+                and <read_2> has the format
+                    [<start>, <end>, <cigar>, ?<read_seq>?]
+                For single-end reads, read has format:
+                    [<guid>, <start>, <end>, <name>, cigar, seq] 
+                NOTE: read end and sequence data are not valid for reads outside of
+                requested region and should not be used.
+            
+            max_low - lowest coordinate for the returned reads
+            max_high - highest coordinate for the returned reads
+            message - error/informative message
         """
         start, end = int(start), int(end)
         orig_data_filename = self.original_dataset.file_name
@@ -304,9 +323,13 @@
                 r2 = [ read['mate_start'], read['mate_start'] ]
 
             results.append( [ "%i_%s" % ( read_start, qname ), read_start, read_end, qname, r1, r2 ] )
-
+            
+        # Clean up.
         bamfile.close()
-        return { 'data': results, 'message': message }
+        
+        max_low, max_high = get_bounds( results, 1, 2 )
+                
+        return { 'data': results, 'message': message, 'max_low': max_low, 'max_high': max_high }
 
 class BBIDataProvider( TracksDataProvider ):
     """
@@ -334,9 +357,10 @@
                 return None
             
             all_dat = all_dat[0] # only 1 summary
-            return { 'max': float( all_dat['max'] ), \
-                     'min': float( all_dat['min'] ), \
-                     'total_frequency': float( all_dat['coverage'] ) }
+            return { 'data' : { 'max': float( all_dat['max'] ), \
+                                'min': float( all_dat['min'] ), \
+                                'total_frequency': float( all_dat['coverage'] ) } \
+                    }
                      
         start = int(start)
         end = int(end)
@@ -361,7 +385,7 @@
                 result.append( (pos, float_nan(dat_dict['mean']) ) )
                 pos += step_size
             
-        return result
+        return { 'data': result }
 
 class BigBedDataProvider( BBIDataProvider ):
     def _get_dataset( self ):
@@ -654,7 +678,7 @@
                         float( feature[5] )]
             rval.append(payload)
 
-        return { 'data_type' : 'vcf', 'data': rval, 'message': message }
+        return { 'data': rval, 'message': message }
 
 class GFFDataProvider( TracksDataProvider ):
     """


--- a/lib/galaxy/web/controllers/tracks.py	Mon Sep 12 14:51:26 2011 -0400
+++ b/lib/galaxy/web/controllers/tracks.py	Tue Sep 13 16:55:17 2011 -0400
@@ -530,15 +530,9 @@
             data_provider = data_provider_class( converted_dataset=converted_dataset, original_dataset=dataset, dependencies=deps )
         
         # Get and return data from data_provider.
-        data = data_provider.get_data( chrom, low, high, int(start_val), int(max_vals), **kwargs )
-        message = None
-        if isinstance(data, dict) and 'message' in data:
-            message = data['message']
-            tracks_dataset_type = data.get( 'data_type', tracks_dataset_type )
-            track_data = data['data']
-        else:
-            track_data = data
-        return { 'dataset_type': tracks_dataset_type, 'extra_info': extra_info, 'data': track_data, 'message': message }
+        result = data_provider.get_data( chrom, low, high, int(start_val), int(max_vals), **kwargs )
+        result.update( { 'dataset_type': tracks_dataset_type, 'extra_info': extra_info } )
+        return result
         
     @web.json
     def save( self, trans, **kwargs ):


--- a/static/scripts/trackster.js	Mon Sep 12 14:51:26 2011 -0400
+++ b/static/scripts/trackster.js	Tue Sep 13 16:55:17 2011 -0400
@@ -443,9 +443,9 @@
             $.extend(extra_params, {start_val: cur_data.data.length + 1});
         }
         else if (req_type === this.BROAD_DATA_REQ) {
-            // Set query low to be past the last feature returned so that an area of extreme feature depth
-            // is bypassed.
-            query_low = cur_data.data[cur_data.data.length - 1][2] + 1;
+            // To get past an area of extreme feature depth, set query low to be after either
+            // (a) the maximum high or HACK/FIXME (b) the end of the last feature returned.
+            query_low = (cur_data.max_high ? cur_data.max_high : cur_data.data[cur_data.data.length - 1][2]) + 1;
         }
         
         //
@@ -463,6 +463,9 @@
             // Update data and message.
             if (result.data) {
                 result.data = cur_data.data.concat(result.data);
+                if (result.max_low) {
+                    result.max_low = cur_data.max_low;
+                }
                 if (result.message) {
                     // HACK: replace number in message with current data length. Works but is ugly.
                     result.message = result.message.replace(/[0-9]+/, result.data.length);

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.


More information about the galaxy-commits mailing list