SAS編程-宏:檢查缺失或多余的文件

日常項目中,程序或Outputs一般會提前確定好,通過Excel Tracker或者文件管理系統(tǒng)進(jìn)行管理。各個公司一般也有相關(guān)工具進(jìn)行檢查,以確保Tracker中的程序或Outputs都在輸出文件夾中。

不過,本公司工具不會檢查文件夾中多余的程序或Outputs。于是,寫了個SAS宏程序用于實現(xiàn)這個功能。這個宏程序也可用于檢查其它所需文件,例如xml

宏程序匯總在文章末尾,若參考引用,需要結(jié)合各自項目路徑結(jié)構(gòu),以及Tracker信息,來更新程序。

宏程序的分為3個部分:

  1. 獲取系統(tǒng)或Tracker中的文件名稱
  2. 獲取文件夾中已有的文件名稱
  3. 通過比較以上兩類名稱,獲取缺失或多余的文件

第一部分,可能通過公司的管理工具獲取,也可以簡單導(dǎo)入EXCEL Tracker。

options validvarname = v7;
proc import datafile = "/xxx/xxx/xxx.xlsx"
   out = tfls dbms = xlsx replace;
run;

第二部分,可以通過Dopen系列函數(shù)獲取文件名稱,具體可以參考文章SAS編程:如何獲取某路徑下文件的名稱?,完整內(nèi)容可以整合成一個宏程序。

%macro get_file(dirpath =, outdt =, type = sas );

%if "&dirpath." ne "" %then %do;

%local dirpath_tmp slash;

%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);

*Remove trailing slash;
%if "%substr(&dirpath., %length(&dirpath.),1)"= "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1, %length(&dirpath.) -1); 
%else %let dirpath_tmp = &dirpath.;

** Dopen--Get filepath;
data &outdt.;
  fileres = filename("dirpath", "&dirpath_tmp");
  dirid = dopen("dirpath");
  num = dnum(dirid);

  length direct filename filepath $200;

  if dirid >0 and num >0 then do;
    do i=1 to num;
      direct = "&dirpath_tmp.";
      filename = dread(dirid, i);
      filepath = catx("&slash.", direct, filename);

      %if %index(%upcase(&type), SAS) %then %do;
        if strip(scan(filename, 2, "."))="sas" then output;
      %end;

      %if %index(%upcase(&type), RTF) %then %do;
        if strip(scan(filename, 2, "."))="rtf" then output;
      %end;

      %if %index(%upcase(&type), XLSX) %then %do;
        if strip(scan(filename, 2, "."))= "xlsx" then output;
      %end;

      end;
  end;

  keep filename filepath;

  proc sort;
    by filename;
  run;
%end;
%mend get_file;

%get_file(
  dirpath = &path_ana./tables
  ,outdt = source_pgm_tables
  ,type = sas 
);

第三部分,可以通過SQL簡單的篩選呈現(xiàn)缺失或多余的文件。

%let out_type = tables;
%let type = T;

proc sql noprint;
  **in tracker not in folder;
  create table source_in_tracker_&out_type. as
    select output_number, base_output_name, program_name, programmer, tester
    from tfls
    where output_type = "&type." and program_name not in
    (
      select filename from source_pgm_&out_type.
    )
  ;

  **in folder not in tracker;
   create table source_in_folder_&out_type. as
      select *
      from source_pgm_&out_type.
      where filename not in
      (
        select program_name from tfls
      )
  ;
quit;

以上三部分,可以整合成一個完整的宏程序,用于檢查同一個分析文件夾下的TFLs程序或Output缺失或多余的情況。

以我當(dāng)前公司的數(shù)據(jù)結(jié)構(gòu)為例,匯總程序如下。若讀者想要引用,需結(jié)合項目文件結(jié)構(gòu)以及Tracker信息進(jìn)行更新。

***1. Get file names in tracker (Company internal tool);
%global path_ana pi_sas;

%let path_ana = /xxx/xxx;
%let pi_sas = pi_xxx_xxx.sas;

%include "&path_ana./docs/pi/&pi_sas.";

%pi_amg757_bla_iss_90d_2023(
  lib = work
  ,txtpath = &path_ana./docs/pi/
  ,outds = all
);


***2. Macro to get  file names in folder;
%macro get_file(dirpath =, outdt =, type = sas );

%if "&dirpath." ne "" %then %do;

%local dirpath_tmp slash;

%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);

*Remove trailing slash;
%if "%substr(&dirpath., %length(&dirpath.),1)"= "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1, %length(&dirpath.) -1); 
%else %let dirpath_tmp = &dirpath.;

** Dopen--Get filepath;
data &outdt.;
  fileres = filename("dirpath", "&dirpath_tmp");
  dirid = dopen("dirpath");
  num = dnum(dirid);

  length direct filename filepath $200;

  if dirid >0 and num >0 then do;
    do i=1 to num;
      direct = "&dirpath_tmp.";
      filename = dread(dirid, i);
      filepath = catx("&slash.", direct, filename);

      %if %index(%upcase(&type), SAS) %then %do;
        if strip(scan(filename, 2, "."))="sas" then output;
      %end;

      %if %index(%upcase(&type), RTF) %then %do;
        if strip(scan(filename, 2, "."))="rtf" then output;
      %end;

      %if %index(%upcase(&type), XLSX) %then %do;
        if strip(scan(filename, 2, "."))= "xlsx" then output;
      %end;

      %if %index(%upcase(&type), XML) %then %do;
        if strip(scan(filename, 2, "."))= "xml" then output;
      %end;

      end;
  end;

  keep filename filepath;

  proc sort;
    by filename;
  run;
%end;
%mend get_file;


***3. Macro to check folder file;
%macro check_folder_file(out_type = tables);

%local type;
%if %upcase(&out_type) = TABLES %then %let type = T;
%else %if %upcase(&out_type) = LISTINGS %then %let type = L;
%else %if %upcase(&out_type) = FIGURES %then %let type = F;

**3.1 Source folder;
%get_file(
  dirpath = &path_ana./&out_type.
  ,outdt = source_pgm_&out_type.
);

proc sql noprint;
  **in tracker not in folder;
  create table source_in_tracker_&out_type. as
    select output_number, base_output_name, program_name, programmer, tester
    from tfls
    where output_type = "&type." and program_name not in
    (
      select filename from source_pgm_&out_type.
    )
  ;

  **in folder not in tracker;
   create table source_in_folder_&out_type. as
      select *
      from source_pgm_&out_type.
      where filename not in
      (
        select program_name from tfls
      )
  ;
quit;


**3.2 QC folder;
%get_file(
  dirpath = &path_ana./&out_type./validation
  ,outdt = qc_pgm_&out_type.
);

proc sql noprint;
  **in tracker not in folder;
  create table qc_in_tracker_&out_type. as
    select output_number, testing_program_name, tester
    from tfls
    where output_type = "&type." and testing_program_name not in
    (
      select filename from qc_pgm_&out_type.
    )
  ;

  **in folder not in tracker;
   create table qc_in_folder_&out_type. as
      select *
      from qc_pgm_&out_type.
      where filename not in
      (
        select testing_program_name from tfls
      )
  ;
quit;


**3.3 Output folder;
%get_file(
  dirpath = &path_ana./&out_type./output
  ,outdt = output_&out_type.
  ,type = rtf-xlsx
);

proc sql noprint;

  **in tracker not in folder;
  create table output_in_tracker_&out_type. as
    select *
    from tfls
    where output_type = "&type." and output_name not in
    (
      select filename from output_&out_type.
    )
  ;

  **in folder not in tracker;
   create table output_in_folder_&out_type. as
      select *
      from output_&out_type.
      where filename not in
      (
        select output_name from tfls
        where output_type = "&type."
      )
  ;

**3.4 xml template folder (internal use);
%get_file(
  dirpath = &path_ana./docs/templates
  ,outdt = tflgen_template
  ,type = xml
);

proc sql noprint;

  **in tracker not in folder;
  create table tflgen_template_in_tracker as
    select *
    from tfls
    where tflgen_allow_code_generation = "Yes" and strip(tflgen_template_name)||".xml" not in
    (
      select filename from tflgen_template
    )
  ;

  **in folder not in tracker;
   create table tflgen_template_in_folder  as
      select *
      from tflgen_template
      where filename not in
      (
        select strip(tflgen_template_name)||".xml" from tfls
        where tflgen_allow_code_generation = "Yes" 
      )
  ;

quit;


%mend check_folder_file;

%check_folder_file(out_type = tables);

%check_folder_file(out_type = listings);

%check_folder_file(out_type = figures);

感謝閱讀, 歡迎關(guān)注:SAS茶談!
若有疑問,歡迎評論交流!

梳理不易,轉(zhuǎn)載請注明出處 (by Jihai / SAS茶談)

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容