dumx.c

by Marko Riedel markoriedelde@yahoo.de
My homepage is here.
Generated Sat May 13 05:00:57 2006.
/* find the n largests files and/or directories */
/* by Marko Riedel, markoriedelde@yahoo.de */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <math.h>
long double roundl(long double); /* for C90 compilers */

#include <assert.h>

#include <pwd.h>
#include <grp.h>

#define  __USE_LARGEFILE64
#include <sys/types.h>
#include <sys/stat.h>

#include <dirent.h>

#include <getopt.h>
#include <regex.h>


/* search parameters */

#define NDEFAULT 10
#define OPTS "vhn:"


char type = 0; /* may be set to 'f' or 'd' */
int xdev = 0; /* don't descend into directories on other filesystems */
int v = 0; /* find smallest rather than largest items */
uid_t uid; int useUID = 0; /* restrict to one user */
gid_t gid; int useGID = 0; /* restrict to one group */

int pattern = 0; /* use regex */  
int iregex = 0;
char *regtext = NULL;
regex_t reg;

/* string manipulation routines */

typedef struct {
  char *buf;
  int length, alloc;
} stringstruct, *string;

string stringfromcstr(char *b)
{
  string result = (string)malloc(sizeof(stringstruct));

  result->length = strlen(b);
  result->alloc = result->length+1;
  result->buf = (char *)malloc(result->alloc*sizeof(char));

  strcpy(result->buf, b);

  return result;
}

int stringappendpathcomponent(string str, char *ent)
{
  int prevlen = str->length, nextlen = prevlen+1+strlen(ent);

  if(str->alloc<nextlen+1){
    str->buf = (char *)realloc(str->buf, (nextlen+1)*sizeof(char));
    str->alloc = nextlen+1; 
  }

  str->buf[prevlen] = '/';
  strcpy(str->buf+prevlen+1, ent);
  str->length = nextlen;

  return prevlen;
}

int stringtruncate(string str, int len)
{
  int prevlen = str->length;

  assert((0 <= len) && (len<=str->length));

  str->length = len;
  str->buf[len] = 0;

  return prevlen;
}

void stringfree(string str)
{
  free(str->buf);
  free(str);
}

/* the actual search */

typedef struct {
  char *name;
  unsigned long long size; /* permit large file sizes */
} sizedata;

void insert(char *path, unsigned long long size, int n, sizedata *data)
{
  int k;
  for(k=0; k<n; k++){
    if(data[k].name==NULL || 
       (!v && data[k].size<=size) ||
       (v==1 && data[k].size>=size)){
      break;
    }
  }

  if(k<n){
    if(data[k].name!=NULL){
      if(data[k].size==size && !strcmp(data[k].name, path)){
	return;
      }

      if(data[n-1].name!=NULL){
        free(data[n-1].name);
      }
      memmove(data+k+1, data+k, (n-1-k)*sizeof(sizedata));
    }

    data[k].name = strdup(path);
    data[k].size = size;
  }
}

unsigned long long find(string path, int n, sizedata *data, struct stat64 dbuf)
{
  char *openit = (path->length>0 ? path->buf : "/");
  DIR *current;

  if((current=opendir(openit))==NULL){
    fprintf(stderr, "couldn't open %s (%d): ", openit, __LINE__);
    perror(NULL);

    return 0;
  }

  unsigned long long total = 0; int prevlen = path->length;

  struct dirent *entry;
  while((entry=readdir(current))!=NULL){
    char *name = entry->d_name;
    if(!name[0] ||
       (!name[1] && name[0]=='.') ||
       (!name[2] && name[0]=='.' && name[1]=='.')){
      continue;
    }
    
    stringappendpathcomponent(path, name);

    struct stat64 buf;
    if(lstat64(path->buf, &buf)==-1){
      fprintf(stderr, "couldn't stat %s (%d): ", path->buf, __LINE__);
      perror(NULL);
    }
    else{
      unsigned long long size = 0;

      if(S_ISREG(buf.st_mode) || S_ISLNK(buf.st_mode)){
        int fileinsert = 1;
        if(type=='d' || 
           (useUID==1 && buf.st_uid!=uid) ||
           (useGID==1 && buf.st_gid!=gid) ||
           (pattern==1 && 
            regexec(&reg, path->buf, 0, NULL, 0)==REG_NOMATCH)){
           fileinsert = 0;
        }

	if(S_ISREG(buf.st_mode)){
	  size = buf.st_blocks*512;
	}
        if(fileinsert){
          insert(path->buf, size, n, data);
        }
	if(buf.st_nlink>1){
	  size /= buf.st_nlink;
	}
      }
      else if(S_ISDIR(buf.st_mode)){
        if(!xdev || buf.st_dev==dbuf.st_dev){
          size = find(path, n, data, buf);
        }
      }

      total += size;
    }

    stringtruncate(path, prevlen);
  }

  closedir(current);

  int dirinsert = 1;
  if(type=='f' || 
     (useUID==1 && dbuf.st_uid!=uid) ||
     (useGID==1 && dbuf.st_gid!=gid) ||
     (pattern==1 && 
      regexec(&reg, path->buf, 0, NULL, 0)==REG_NOMATCH)){
    dirinsert = 0;
  }

  total += dbuf.st_blocks*512;
  if(dirinsert){
    char *dirpath = (path->length>0 ? path->buf : "/");
    insert(dirpath, total, n, data);
  }
  return total;
}

/* output search results */

#define SFX 8

void output(char *name, unsigned long long size, int hflag)
{
  if(hflag){
    long double val = (long double)size;

    char suffix[SFX] = { 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' };
    int s = -1;
    
    while(val>=1024 && s<SFX){
      val /= 1024;
      s++;
    }

    if(s==-1){
      printf("%-8d", (int)size);
    }
    else{
      char buf[256];

      if(val>=100){
	sprintf(buf, "%d%c", (int)val, suffix[s]);
      }
      else if(val>=10){
	val = roundl(val*10)/10;
	if(floorl(val)==val){
	  sprintf(buf, "%d%c", (int)val, suffix[s]);
	}
	else{
	  sprintf(buf, "%.1Lf%c", val, suffix[s]);
	}
      }
      else{
	val = roundl(val*100)/100;
	if(floorl(val)==val){
	  sprintf(buf, "%d%c", (int)val, suffix[s]);
	}
	else if(floorl(val*10)==val*10){
	  sprintf(buf, "%.1Lf%c", val, suffix[s]);
	}
	else{
	  sprintf(buf, "%.2Lf%c", val, suffix[s]);
	}
      }

      printf("%-8s", buf);
    }
  }
  else{
    printf("%-24llu", (unsigned long long)size);
  }

  puts(name);
}

extern char *optarg;
extern int optind, opterr, optopt;

void usage(char *prog)
{
  fprintf(stderr, 
          "   %s [-v] [-h] [-n <number>] path1 path2 ...  ,\n\n"
          "   finds the largest files/directories in path1, path2, etc.,\n"
          "   where -h is for human-readable output,\n"
          "   -v inverts (find smallest)\n"
          "   and -n sets the number of files to return, default is %d.\n\n"
          "   Long options:\n"
          "       -type <f|d> files/directories only\n"
          "       -xdev don't descend into other file systems\n"
          "       -uid <number> restrict to given user id\n"
          "       -gid <number> restrict to given group id\n"
          "       -user <name> restrict to given user name\n"
          "       -group <name> restrict to given group name\n"
          "       -regex <pattern> restrict to files matching pattern.\n"
          "       -iregex <pattern> like -regex, but case-insensitive.\n",
          prog, NDEFAULT);
  exit(-1);
}

int comparestrptrs(const void *a, const void *b)
{
  return strcmp(*(const char **)a, *(const char **)b);
}

int main(int argc, char **argv)
{
  int hflag = 0, n = NDEFAULT;

  opterr = 0;
  int option_index = 0;
  static struct option long_options[] = {
    {"type", required_argument, NULL, 0}, /* may be 'f' or 'd' */
    {"xdev", no_argument, NULL, 0}, /* don't descend into other file systems */
    {"uid", required_argument, NULL, 0}, /* user id */
    {"gid", required_argument, NULL, 0}, /* group id */
    {"user", required_argument, NULL, 0}, /* user name */
    {"group", required_argument, NULL, 0}, /* group name */
    {"regex", required_argument, NULL, 0}, /* regular expression */
    {"iregex", required_argument, NULL, 0}, /* case-insensitive regex */
    {0, 0, 0, 0}
  };  

  char c;
  while((c=getopt_long_only(argc, argv, OPTS, long_options, &option_index))!=-1){
    switch(c){
    case 0:
      switch(option_index){
      case 0:
        if(!strcmp(optarg, "f")){
          type = 'f';
        }
        else if(!strcmp(optarg, "d")){
          type = 'd';
        }
        else{
          fprintf(stderr, "illegal type %s, need 'f' or 'd'\n", optarg);
          exit(-2);
        }
        break;
      case 1:
        xdev = 1;
        break;
      case 2:
        useUID = 1;
        uid = atoi(optarg);
        break;
      case 3:
        useGID = 1;
        gid = atoi(optarg);
        break;
      case 4: {
        struct passwd *pw = getpwnam(optarg);
        if(pw==NULL){
          fprintf(stderr, "no user named %s\n", optarg);
          exit(-3);
        }
        useUID = 1;
        uid = pw->pw_uid;
        break;
      }
      case 5: {
        struct group *gr = getgrnam(optarg);
        if(gr==NULL){
          fprintf(stderr, "no group named %s\n", optarg);
          exit(-4);
        }
        useGID = 1;
        gid = gr->gr_gid;
        break; 
      }
      case 6:
      case 7:
        if(regtext!=NULL){
          free(regtext);
        }
        regtext = strdup(optarg);
        pattern = 1;
        if(option_index==7){
          iregex = 1;
        }
        break;
      default:
        fprintf(stderr, "option parser in illegal state\n");
        exit(-5);
      }
      break;
    case 'h': 
      hflag = 1; 
      break;
    case 'v':
      v = 1;
      break;
    case 'n': 
      n = atoi(optarg);
      if(n<1){
        fprintf(stderr, 
                "number of files/directories must be positive, "
                "got %d\n", n);
        exit(-6);
      }
      break;
    default:
      usage(argv[0]);
    }
  }

  if(pattern){
    int errcode; int regflags = REG_EXTENDED | REG_NOSUB;
    if(iregex){
      regflags |= REG_ICASE;
    }
    if((errcode = regcomp(&reg, regtext, regflags))){
      int errlen = regerror(errcode, &reg, NULL, 0);
      char errbuf[errlen];
    
      regerror(errcode, &reg, errbuf, errlen);
      
      fprintf(stderr, "error compiling regex \"%s\": %s\n", 
              regtext, errbuf);
      exit(-7);
    }  
  }
   
  sizedata *data = (sizedata *)malloc(n*sizeof(sizedata));
  int k;

  for(k=0; k<n; k++){
    data[k].name = NULL;
    data[k].size = 0;
  }

  string path; struct stat64 buf;

  if(optind==argc){
    if(lstat64(".", &buf)==-1){
      fprintf(stderr, "couldn't stat . (%d): ", __LINE__);
      perror(NULL);
    }
    else{
      path = stringfromcstr(".");
      find(path, n, data, buf);
      stringfree(path);
    }
  }
  else{
    qsort(argv+optind, argc-optind, sizeof(char *), comparestrptrs);

    for(k=optind; k<argc; k++){
      if(k>optind){
	int len1 = strlen(argv[k-1]), len2 = strlen(argv[k]);

	if(len1==len2 || (len1==len2-1 && argv[k][len2-1]=='/')){
	  if(!strncmp(argv[k-1], argv[k], len1)){
	    continue;
	  }
	}
      }

      if(lstat64(argv[k], &buf)==-1){
        fprintf(stderr, "couldn't stat %s (%d): ", argv[k], __LINE__);
        perror(NULL);
        continue;
      }

      if(S_ISREG(buf.st_mode)){
        insert(argv[k], buf.st_blocks*512, n, data);
        continue;
      }
      if(S_ISLNK(buf.st_mode)){
        insert(argv[k], 0, n, data);
        continue;
      }

      if(!S_ISDIR(buf.st_mode)){
        fprintf(stderr, "not a directory, file or link (%d): %s\n", 
                __LINE__, argv[k]);
        continue;
      }
      
      path = stringfromcstr(argv[k]);
      if(path->buf[path->length-1]=='/'){
        stringtruncate(path, path->length-1);
      }
      find(path, n, data, buf);
      stringfree(path);
    }
  }

  if(pattern){
    regfree(&reg);
  }

  for(k=0; k<n; k++){
    if(data[k].name==NULL){
      break;
    }

    output(data[k].name, data[k].size, hflag);
    free(data[k].name);
  }

  free(data);

  return 0;
}

markoriedelde@yahoo.de