[galaxy-commits] galaxy-dist commit 481da3760997: Improvements to history import/export archiving: (a) security checks when opening and/or copying files from imported archive and (b) implemented options to gzip, include/exclude hidden datasets, and include/exclude deleted datasets when exporting to archive.

commits-noreply at bitbucket.org commits-noreply at bitbucket.org
Fri Jul 16 10:06:28 EDT 2010


# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User jeremy goecks <jeremy.goecks at emory.edu>
# Date 1278421808 14400
# Node ID 481da37609972064346f7a91853686533dcea84c
# Parent  40bfc0def292da5d1c83ea3c55ee9915b7fdbf74
Improvements to history import/export archiving: (a) security checks when opening and/or copying files from imported archive and (b) implemented options to gzip, include/exclude hidden datasets, and include/exclude deleted datasets when exporting to archive.

--- a/lib/galaxy/web/controllers/history.py
+++ b/lib/galaxy/web/controllers/history.py
@@ -10,7 +10,7 @@ from galaxy.tools.parameters.basic impor
 from galaxy.tools.actions import upload_common
 from galaxy.tags.tag_handler import GalaxyTagHandler
 from sqlalchemy.sql.expression import ClauseElement
-import webhelpers, logging, operator, tempfile, subprocess, shutil, tarfile
+import webhelpers, logging, operator, os, tempfile, subprocess, shutil, tarfile
 from datetime import datetime
 from cgi import escape
 
@@ -443,28 +443,31 @@ class HistoryController( BaseController,
             self.add_item_annotation( trans, history, new_annotation )
             trans.sa_session.flush()
             return new_annotation
-            
-    def import_archive( self, trans, archived_history=None ):
+
+    def import_archive( self, trans, archived_history=None, gzip=True ):
         """ Import a history. """
         
+        def file_in_dir( file_path, a_dir ):
+            """ Returns true if file is in directory. """
+            abs_file_path = os.path.abspath( file_path )
+            return os.path.split( abs_file_path )[0] == a_dir
+        
         if archived_history is not None:
             try:         
                 history_archive_file = tarfile.open( archived_history.file.name )
-            
-                # Security check: make sure that members are relative, not absolute.
-                for tarinfo in history_archive_file.getmembers():
-                    if tarinfo.name.startswith("/") or tarinfo.name.find("..") != -1:
-                        return trans.show_error_message( 'Error importing history archive: archive file is invalid.' )
-            
+                    
                 # Unpack archive in temporary directory.
                 temp_output_dir = tempfile.mkdtemp()
                 history_archive_file.extractall( path=temp_output_dir )
                 history_archive_file.close()
-        
+    
                 #
                 # Create history.
                 #
-                history_attr_in = open( '%s/%s' % ( temp_output_dir, 'history_attrs.txt'), 'rb' )
+                history_attr_file_name = '%s/%s' % ( temp_output_dir, 'history_attrs.txt')
+                if not file_in_dir( history_attr_file_name, temp_output_dir ):
+                    raise Exception( "Invalid location for history attributes file: %s" % history_attr_file_name )
+                history_attr_in = open( history_attr_file_name, 'rb' )
                 history_attr_str = ''
                 buffsize = 1048576
                 try:
@@ -476,32 +479,35 @@ class HistoryController( BaseController,
                     pass
                 history_attr_in.close()
                 history_attrs = from_json_string( history_attr_str )
-        
+    
                 # Create history.
                 new_history = model.History( name='imported from archive: %s' % history_attrs['name'].encode( 'utf-8' ), user=trans.user )
                 trans.sa_session.add( new_history )
-            
+        
                 new_history.hid_counter = history_attrs['hid_counter']
                 new_history.genome_build = history_attrs['genome_build']
                 trans.sa_session.flush()
-            
+        
                 # Builds a tag string for a tag, value pair.
                 def get_tag_str( tag, value ):
                     if not value:
                         return tag
                     else:
                         return tag + ":" + value
-                            
+                        
                 # Add annotation, tags.
                 if trans.user:
                     self.add_item_annotation( trans, new_history, history_attrs[ 'annotation' ] )
                     for tag, value in history_attrs[ 'tags' ].items():
                         trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) )
-        
+    
                 #
                 # Create datasets.
                 #
-                datasets_attr_in = open( '%s/%s' % ( temp_output_dir, 'datasets_attrs.txt'), 'rb' )
+                datasets_attrs_file_name = '%s/%s' % ( temp_output_dir, 'datasets_attrs.txt')
+                if not file_in_dir( datasets_attrs_file_name, temp_output_dir ):
+                    raise Exception( "Invalid location for dataset attributes file: %s" % datasets_attrs_file_name )
+                datasets_attr_in = open( datasets_attrs_file_name, 'rb' )
                 datasets_attr_str = ''
                 buffsize = 1048576
                 try:
@@ -513,11 +519,11 @@ class HistoryController( BaseController,
                     pass
                 datasets_attr_in.close()
                 datasets_attrs = from_json_string( datasets_attr_str )
-        
+    
                 # Create datasets.
                 for dataset_attrs in datasets_attrs:
                     metadata = dataset_attrs['metadata']
-            
+        
                     # Create dataset and HDA.
                     hda = model.HistoryDatasetAssociation( name = dataset_attrs['name'].encode( 'utf-8' ),
                                                            extension = dataset_attrs['extension'],
@@ -539,24 +545,29 @@ class HistoryController( BaseController,
                     permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                     trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                     trans.sa_session.flush()
-            
-                    # Copy dataset data.
-                    temp_dataset_name = '%s/datasets/%s' % ( temp_output_dir, dataset_attrs['file_name'] )
-                    shutil.copyfile( temp_dataset_name, hda.file_name )
-            
+        
+                    # Do security check and copy dataset data.
+                    temp_dataset_file_name = '%s/datasets/%s' % ( temp_output_dir, dataset_attrs['file_name'] )
+                    if not file_in_dir( temp_dataset_file_name, temp_output_dir + "/datasets" ):
+                        raise Exception( "Invalid dataset path: %s" % temp_dataset_file_name )
+                    shutil.move( temp_dataset_file_name, hda.file_name )
+        
                     # Set tags, annotations.
                     if trans.user:
                         self.add_item_annotation( trans, hda, dataset_attrs[ 'annotation' ] )
                         for tag, value in dataset_attrs[ 'tags' ].items():
                             trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) )
                             trans.sa_session.flush()
-            
+        
                 #
                 # Create jobs.
                 #
-            
+        
                 # Read jobs attributes.
-                jobs_attr_in = open( '%s/%s' % ( temp_output_dir, 'jobs_attrs.txt'), 'rb' )
+                jobs_attr_file_name = '%s/%s' % ( temp_output_dir, 'jobs_attrs.txt')
+                if not file_in_dir( jobs_attr_file_name, temp_output_dir ):
+                    raise Exception( "Invalid location for jobs' attributes file: %s" % jobs_attr_file_name )
+                jobs_attr_in = open( jobs_attr_file_name, 'rb' )
                 jobs_attr_str = ''
                 buffsize = 1048576
                 try:
@@ -567,7 +578,7 @@ class HistoryController( BaseController,
                 except OverflowError:
                     pass
                 jobs_attr_in.close()
-            
+        
                 # Decode jobs attributes.
                 def as_hda( obj_dct ):
                     """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by 
@@ -577,7 +588,7 @@ class HistoryController( BaseController,
                                             .filter_by( history=new_history, hid=obj_dct['hid'] ).first()
                     return obj_dct
                 jobs_attrs = from_json_string( jobs_attr_str, object_hook=as_hda )
-            
+        
                 # Create each job.
                 for job_attrs in jobs_attrs:
                     imported_job = model.Job()
@@ -590,7 +601,7 @@ class HistoryController( BaseController,
                     imported_job.imported = True
                     trans.sa_session.add( imported_job )
                     trans.sa_session.flush()
-                
+            
                     class HistoryDatasetAssociationIDEncoder( simplejson.JSONEncoder ):
                         """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """
                         def default( self, obj ):
@@ -598,7 +609,7 @@ class HistoryController( BaseController,
                             if isinstance( obj, model.HistoryDatasetAssociation ):
                                 return obj.id
                             return simplejson.JSONEncoder.default( self, obj )
-                                    
+                                
                     # Set parameters. May be useful to look at metadata.py for creating parameters.
                     # TODO: there may be a better way to set parameters, e.g.:
                     #   for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
@@ -614,9 +625,9 @@ class HistoryController( BaseController,
                             value = input_hda.id
                         #print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id )
                         imported_job.add_parameter( name, to_json_string( value, cls=HistoryDatasetAssociationIDEncoder ) )
-                    
+                
                     # TODO: Connect jobs to input datasets.
-                
+            
                     # Connect jobs to output datasets.
                     for output_hid in job_attrs[ 'output_datasets' ]:
                         #print "%s job has output dataset %i" % (imported_job.id, output_hid)
@@ -625,11 +636,11 @@ class HistoryController( BaseController,
                         if output_hda:
                             imported_job.add_output_dataset( output_hda.name, output_hda )
                     trans.sa_session.flush()
-                                
+                            
                 # Cleanup.
                 if os.path.exists( temp_output_dir ):
                     shutil.rmtree( temp_output_dir )
-        
+    
                 return trans.show_ok_message( message="History '%s' has been imported. " % history_attrs['name'] )
             except Exception, e:
                 return trans.show_error_message( 'Error importing history archive. ' + str( e ) )  
@@ -638,10 +649,18 @@ class HistoryController( BaseController,
             web.FormBuilder( web.url_for(), "Import a History from an Archive", submit_text="Submit" )
                 .add_input( "file", "Archived History File", "archived_history", value=None, error=None ) 
                             )
-            
-    def export_archive( self, trans, id=None ):
+      
+    def export_archive( self, trans, id=None, gzip=True, include_hidden=False, include_deleted=False ):
         """ Export a history. """
         
+        # Convert options to booleans.
+        if isinstance( gzip, basestring ):
+            gzip = ( gzip in [ 'True', 'true', 'T', 't' ] )            
+        if isinstance( include_hidden, basestring ):
+            include_hidden = ( include_hidden in [ 'True', 'true', 'T', 't' ] )
+        if isinstance( include_deleted, basestring ):
+            include_deleted = ( include_deleted in [ 'True', 'true', 'T', 't' ] )    
+        
         #
         # Helper methods/classes.
         #
@@ -702,7 +721,9 @@ class HistoryController( BaseController,
             return trans.show_error_message( "This history does not exist or you cannot export this history." )
             
         history_export_dir_name = "./database/export"
-        archive_file_name = '%s/%s.tar.gz' % ( history_export_dir_name, trans.security.encode_id( history.id ) )
+        archive_file_name = '%s/%s.tar' % ( history_export_dir_name, trans.security.encode_id( history.id ) )
+        if gzip:
+            archive_file_name += '.gz'
         
         #
         # Do export.
@@ -717,7 +738,7 @@ class HistoryController( BaseController,
             try:    
                 # Use temporary directory for temp output files.
                 temp_output_dir = tempfile.mkdtemp()
-            
+        
                 #
                 # Write history attributes to file.
                 #
@@ -734,27 +755,33 @@ class HistoryController( BaseController,
                 history_attrs_out = open( history_attrs_file_name, 'w' )
                 history_attrs_out.write( to_json_string( history_attrs ) )
                 history_attrs_out.close()
-                                        
+                                    
                 #
                 # Write datasets' attributes to file.
                 #
                 datasets = self.get_history_datasets( trans, history )
+                included_datasets = []
                 datasets_attrs = []
                 for dataset in datasets:
+                    if not dataset.visible and not include_hidden:
+                        continue
+                    if dataset.deleted and not include_deleted:
+                        continue
                     dataset.annotation = self.get_item_annotation_str( trans, history.user, dataset )
                     datasets_attrs.append( dataset )
+                    included_datasets.append( dataset )
                 datasets_attrs_file_name = tempfile.NamedTemporaryFile( dir=temp_output_dir ).name
                 datasets_attrs_out = open( datasets_attrs_file_name, 'w' )
                 datasets_attrs_out.write( to_json_string( datasets_attrs, cls=HistoryDatasetAssociationEncoder ) )
                 datasets_attrs_out.close()
-            
+        
                 #
                 # Write jobs attributes file.
                 #
-            
-                # Get all jobs associated with HDAs.
+        
+                # Get all jobs associated with included HDAs.
                 jobs_dict = {}
-                for hda in datasets:
+                for hda in included_datasets:
                     # Get the associated job, if any. If this hda was copied from another,
                     # we need to find the job that created the origial hda
                     job_hda = hda
@@ -763,7 +790,7 @@ class HistoryController( BaseController,
                     if not job_hda.creating_job_associations:
                         # No viable HDA found.
                         continue
-                
+            
                     # Get the job object.
                     job = None
                     for assoc in job_hda.creating_job_associations:
@@ -772,9 +799,9 @@ class HistoryController( BaseController,
                     if not job:
                         # No viable job.
                         continue
-                    
+                
                     jobs_dict[ job.id ] = job
-                    
+                
                 # Get jobs' attributes.
                 jobs_attrs = []
                 for id, job in jobs_dict.items():
@@ -782,7 +809,7 @@ class HistoryController( BaseController,
                     job_attrs[ 'tool_id' ] = job.tool_id
                     job_attrs[ 'tool_version' ] = job.tool_version
                     job_attrs[ 'state' ] = job.state
-                                        
+                                    
                     # Get the job's parameters
                     try:
                         params_objects = job.get_param_values( trans.app )
@@ -794,52 +821,57 @@ class HistoryController( BaseController,
                     for name, value in params_objects.items():
                         params_dict[ name ] = value
                     job_attrs[ 'params' ] = params_dict
-                
+            
                     # Get input, output datasets.
                     input_datasets = [ assoc.dataset.hid for assoc in job.input_datasets ]
                     job_attrs[ 'input_datasets' ] = input_datasets
                     output_datasets = [ assoc.dataset.hid for assoc in job.output_datasets ]
                     job_attrs[ 'output_datasets' ] = output_datasets
-                
+            
                     jobs_attrs.append( job_attrs )
-                
+            
                 jobs_attrs_file_name = tempfile.NamedTemporaryFile( dir=temp_output_dir ).name
                 jobs_attrs_out = open( jobs_attrs_file_name, 'w' )
                 jobs_attrs_out.write( to_json_string( jobs_attrs, cls=HistoryDatasetAssociationEncoder ) )
                 jobs_attrs_out.close()
-    
+
                 #
                 # Write archive and include: (a) history attributes file; (b) datasets attributes file; 
                 # (c) jobs attributes file; and (d) datasets files.
                 #
-                history_archive_name = '%s/%s.tar.gz' % ( history_export_dir_name, trans.security.encode_id( history.id ) )
-                history_archive = tarfile.open( history_archive_name, "w:gz" )
+                tarfile_mode = "w"
+                if gzip:
+                    tarfile_mode += ":gz"
+                history_archive = tarfile.open( archive_file_name, tarfile_mode )
                 history_archive.add( history_attrs_file_name, arcname="history_attrs.txt" )
                 history_archive.add( datasets_attrs_file_name, arcname="datasets_attrs.txt" )
                 history_archive.add( jobs_attrs_file_name, arcname="jobs_attrs.txt" )
-                for i, dataset in enumerate( datasets ) :
+                for i, dataset in enumerate( included_datasets ):
                     history_archive.add( dataset.file_name, arcname="datasets/%s" % dataset.file_name.split('/')[-1] )
                 history_archive.close()
-    
+
                 # Remove temp directory.
                 if os.path.exists( temp_output_dir ):
-                    shutil.rmtree( temp_output_dir )
-            
+                    shutil.rmtree( temp_output_dir )        
             except Exception, e:
                 return trans.show_error_message( 'Error creating history archive. ' + str( e ) )
         
         #
         # Stream archive.
         #
-        if os.path.exists( history_archive_name ):
+        if os.path.exists( archive_file_name ):
             valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
             hname = history.name
             hname = ''.join(c in valid_chars and c or '_' for c in hname)[0:150]
-            trans.response.headers["Content-Disposition"] = "attachment; filename=Galaxy-History-%s.tar.gz" % ( hname )
-            trans.response.set_content_type( 'application/x-gzip' )
-            return open( history_archive_name )
+            trans.response.headers["Content-Disposition"] = "attachment; filename=Galaxy-History-%s.tar" % ( hname )
+            if gzip:
+                trans.response.headers["Content-Disposition"] += ".gz"
+                trans.response.set_content_type( 'application/x-gzip' )
+            else:
+                trans.response.set_content_type( 'application/x-tar' )
+            return open( archive_file_name )
         else:
-            return
+            return trans.show_error_message( 'Archive file does not exist.' )
     
     @web.expose
     @web.json


More information about the galaxy-commits mailing list