/************************************************************ * Check the contents of a zip file * Load data directly without unzipping * A SAS macro to read a csv file directly from a zip file * * Peng Zeng (Auburn University) * 02-09-2018 ************************************************************/ /************************************************************ * some SAS codes are adapted from * https://blogs.sas.com/content/sasdummy/2015/05/11/using-filename-zip-to-unzip-and-read-data-files-in-sas/ * https://support.sas.com/resources/papers/proceedings14/SAS264-2014.pdf ************************************************************/ filename inzip ZIP "C:\myzip.zip"; /* Read the "members" (files) from the ZIP file */ data contents (keep = memname isFolder); length memname $200 isFolder 8; fid = dopen("inzip"); if fid = 0 then stop; memcount = dnum(fid); do i = 1 to memcount; memname = dread(fid, i); /* check for trailing / in folder name */ isFolder = (first(reverse(trim(memname))) = '/'); output; end; rc = dclose(fid); run; /* create a report of the ZIP contents */ title "Files in the ZIP file"; proc print data = contents noobs N; run; /* copy the file to working directory and then load it */ filename csv "%sysfunc(getoption(work))/tmp.csv" ; data _null_; /* using member syntax here */ infile inzip(subdir/mycsv.csv) lrecl = 256 recfm = F length = length eof = eof unbuf; file csv lrecl = 256 recfm = N; input; put _infile_ $varying256. length; return; eof: stop; run; proc import datafile = csv out = mydata dbms = csv replace; run; proc print data = mydata; run; /* the following method is available since SAS 9.4 */ /* Note: it does not support proc import in this case */ filename fromzip zip 'C:\myzip.zip' member = 'subdir/mycsv.csv'; data mydata2; infile fromzip dsd delimiter = ',' firstobs = 2; input var1 $ var2 $ var3; run; proc print data = mydata2; run; /************************************************************ * A SAS Macro * read the first non-directory file as csv using proc import ************************************************************/ %macro read_zip(myzip, outdata); filename inzip ZIP "&myzip"; filename zipcsv "%sysfunc(getoption(work))/_tmp_.txt" ; /* Read the "members" (files) from the ZIP file */ data zip_contents (keep = memname isFolder); length memname $200 isFolder 8; fid = dopen("inzip"); if fid = 0 then stop; memcount = dnum(fid); do i = 1 to memcount; memname = dread(fid, i); /* check for trailing / in folder name */ isFolder = (first(reverse(trim(memname))) = '/'); output; end; rc = dclose(fid); run; data _null_; set zip_contents; if isFolder = 0 then do; call symput('memfile', strip(memname)); stop; end; run; data _null_; /* using member syntax here */ infile inzip(&memfile) lrecl = 256 recfm = F length = length eof = eof unbuf; file zipcsv lrecl = 256 recfm = N; input; put _infile_ $varying256. length; return; eof: stop; run; proc import datafile = zipcsv out = &outdata dbms = csv replace; guessingrows = 500; run; %mend read_zip; %read_zip(C:\myzip.zip, mydata); proc print data = mydata; run; /************************************************************ * THE END ************************************************************/