/* find the n largests files and/or directories */
/* by Marko Riedel, markoriedelde@yahoo.de */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
long double roundl(long double); /* for C90 compilers */
#include <assert.h>
#include <pwd.h>
#include <grp.h>
#define __USE_LARGEFILE64
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <getopt.h>
#include <regex.h>
/* search parameters */
#define NDEFAULT 10
#define OPTS "vhn:"
char type = 0; /* may be set to 'f' or 'd' */
int xdev = 0; /* don't descend into directories on other filesystems */
int v = 0; /* find smallest rather than largest items */
uid_t uid; int useUID = 0; /* restrict to one user */
gid_t gid; int useGID = 0; /* restrict to one group */
int pattern = 0; /* use regex */
int iregex = 0;
char *regtext = NULL;
regex_t reg;
/* string manipulation routines */
typedef struct {
char *buf;
int length, alloc;
} stringstruct, *string;
string stringfromcstr(char *b)
{
string result = (string)malloc(sizeof(stringstruct));
result->length = strlen(b);
result->alloc = result->length+1;
result->buf = (char *)malloc(result->alloc*sizeof(char));
strcpy(result->buf, b);
return result;
}
int stringappendpathcomponent(string str, char *ent)
{
int prevlen = str->length, nextlen = prevlen+1+strlen(ent);
if(str->alloc<nextlen+1){
str->buf = (char *)realloc(str->buf, (nextlen+1)*sizeof(char));
str->alloc = nextlen+1;
}
str->buf[prevlen] = '/';
strcpy(str->buf+prevlen+1, ent);
str->length = nextlen;
return prevlen;
}
int stringtruncate(string str, int len)
{
int prevlen = str->length;
assert((0 <= len) && (len<=str->length));
str->length = len;
str->buf[len] = 0;
return prevlen;
}
void stringfree(string str)
{
free(str->buf);
free(str);
}
/* the actual search */
typedef struct {
char *name;
unsigned long long size; /* permit large file sizes */
} sizedata;
void insert(char *path, unsigned long long size, int n, sizedata *data)
{
int k;
for(k=0; k<n; k++){
if(data[k].name==NULL ||
(!v && data[k].size<=size) ||
(v==1 && data[k].size>=size)){
break;
}
}
if(k<n){
if(data[k].name!=NULL){
if(data[k].size==size && !strcmp(data[k].name, path)){
return;
}
if(data[n-1].name!=NULL){
free(data[n-1].name);
}
memmove(data+k+1, data+k, (n-1-k)*sizeof(sizedata));
}
data[k].name = strdup(path);
data[k].size = size;
}
}
unsigned long long find(string path, int n, sizedata *data, struct stat64 dbuf)
{
char *openit = (path->length>0 ? path->buf : "/");
DIR *current;
if((current=opendir(openit))==NULL){
fprintf(stderr, "couldn't open %s (%d): ", openit, __LINE__);
perror(NULL);
return 0;
}
unsigned long long total = 0; int prevlen = path->length;
struct dirent *entry;
while((entry=readdir(current))!=NULL){
char *name = entry->d_name;
if(!name[0] ||
(!name[1] && name[0]=='.') ||
(!name[2] && name[0]=='.' && name[1]=='.')){
continue;
}
stringappendpathcomponent(path, name);
struct stat64 buf;
if(lstat64(path->buf, &buf)==-1){
fprintf(stderr, "couldn't stat %s (%d): ", path->buf, __LINE__);
perror(NULL);
}
else{
unsigned long long size = 0;
if(S_ISREG(buf.st_mode) || S_ISLNK(buf.st_mode)){
int fileinsert = 1;
if(type=='d' ||
(useUID==1 && buf.st_uid!=uid) ||
(useGID==1 && buf.st_gid!=gid) ||
(pattern==1 &&
regexec(®, path->buf, 0, NULL, 0)==REG_NOMATCH)){
fileinsert = 0;
}
if(S_ISREG(buf.st_mode)){
size = buf.st_blocks*512;
}
if(fileinsert){
insert(path->buf, size, n, data);
}
if(buf.st_nlink>1){
size /= buf.st_nlink;
}
}
else if(S_ISDIR(buf.st_mode)){
if(!xdev || buf.st_dev==dbuf.st_dev){
size = find(path, n, data, buf);
}
}
total += size;
}
stringtruncate(path, prevlen);
}
closedir(current);
int dirinsert = 1;
if(type=='f' ||
(useUID==1 && dbuf.st_uid!=uid) ||
(useGID==1 && dbuf.st_gid!=gid) ||
(pattern==1 &&
regexec(®, path->buf, 0, NULL, 0)==REG_NOMATCH)){
dirinsert = 0;
}
total += dbuf.st_blocks*512;
if(dirinsert){
char *dirpath = (path->length>0 ? path->buf : "/");
insert(dirpath, total, n, data);
}
return total;
}
/* output search results */
#define SFX 8
void output(char *name, unsigned long long size, int hflag)
{
if(hflag){
long double val = (long double)size;
char suffix[SFX] = { 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' };
int s = -1;
while(val>=1024 && s<SFX){
val /= 1024;
s++;
}
if(s==-1){
printf("%-8d", (int)size);
}
else{
char buf[256];
if(val>=100){
sprintf(buf, "%d%c", (int)val, suffix[s]);
}
else if(val>=10){
val = roundl(val*10)/10;
if(floorl(val)==val){
sprintf(buf, "%d%c", (int)val, suffix[s]);
}
else{
sprintf(buf, "%.1Lf%c", val, suffix[s]);
}
}
else{
val = roundl(val*100)/100;
if(floorl(val)==val){
sprintf(buf, "%d%c", (int)val, suffix[s]);
}
else if(floorl(val*10)==val*10){
sprintf(buf, "%.1Lf%c", val, suffix[s]);
}
else{
sprintf(buf, "%.2Lf%c", val, suffix[s]);
}
}
printf("%-8s", buf);
}
}
else{
printf("%-24llu", (unsigned long long)size);
}
puts(name);
}
extern char *optarg;
extern int optind, opterr, optopt;
void usage(char *prog)
{
fprintf(stderr,
" %s [-v] [-h] [-n <number>] path1 path2 ... ,\n\n"
" finds the largest files/directories in path1, path2, etc.,\n"
" where -h is for human-readable output,\n"
" -v inverts (find smallest)\n"
" and -n sets the number of files to return, default is %d.\n\n"
" Long options:\n"
" -type <f|d> files/directories only\n"
" -xdev don't descend into other file systems\n"
" -uid <number> restrict to given user id\n"
" -gid <number> restrict to given group id\n"
" -user <name> restrict to given user name\n"
" -group <name> restrict to given group name\n"
" -regex <pattern> restrict to files matching pattern.\n"
" -iregex <pattern> like -regex, but case-insensitive.\n",
prog, NDEFAULT);
exit(-1);
}
int comparestrptrs(const void *a, const void *b)
{
return strcmp(*(const char **)a, *(const char **)b);
}
int main(int argc, char **argv)
{
int hflag = 0, n = NDEFAULT;
opterr = 0;
int option_index = 0;
static struct option long_options[] = {
{"type", required_argument, NULL, 0}, /* may be 'f' or 'd' */
{"xdev", no_argument, NULL, 0}, /* don't descend into other file systems */
{"uid", required_argument, NULL, 0}, /* user id */
{"gid", required_argument, NULL, 0}, /* group id */
{"user", required_argument, NULL, 0}, /* user name */
{"group", required_argument, NULL, 0}, /* group name */
{"regex", required_argument, NULL, 0}, /* regular expression */
{"iregex", required_argument, NULL, 0}, /* case-insensitive regex */
{0, 0, 0, 0}
};
char c;
while((c=getopt_long_only(argc, argv, OPTS, long_options, &option_index))!=-1){
switch(c){
case 0:
switch(option_index){
case 0:
if(!strcmp(optarg, "f")){
type = 'f';
}
else if(!strcmp(optarg, "d")){
type = 'd';
}
else{
fprintf(stderr, "illegal type %s, need 'f' or 'd'\n", optarg);
exit(-2);
}
break;
case 1:
xdev = 1;
break;
case 2:
useUID = 1;
uid = atoi(optarg);
break;
case 3:
useGID = 1;
gid = atoi(optarg);
break;
case 4: {
struct passwd *pw = getpwnam(optarg);
if(pw==NULL){
fprintf(stderr, "no user named %s\n", optarg);
exit(-3);
}
useUID = 1;
uid = pw->pw_uid;
break;
}
case 5: {
struct group *gr = getgrnam(optarg);
if(gr==NULL){
fprintf(stderr, "no group named %s\n", optarg);
exit(-4);
}
useGID = 1;
gid = gr->gr_gid;
break;
}
case 6:
case 7:
if(regtext!=NULL){
free(regtext);
}
regtext = strdup(optarg);
pattern = 1;
if(option_index==7){
iregex = 1;
}
break;
default:
fprintf(stderr, "option parser in illegal state\n");
exit(-5);
}
break;
case 'h':
hflag = 1;
break;
case 'v':
v = 1;
break;
case 'n':
n = atoi(optarg);
if(n<1){
fprintf(stderr,
"number of files/directories must be positive, "
"got %d\n", n);
exit(-6);
}
break;
default:
usage(argv[0]);
}
}
if(pattern){
int errcode; int regflags = REG_EXTENDED | REG_NOSUB;
if(iregex){
regflags |= REG_ICASE;
}
if((errcode = regcomp(®, regtext, regflags))){
int errlen = regerror(errcode, ®, NULL, 0);
char errbuf[errlen];
regerror(errcode, ®, errbuf, errlen);
fprintf(stderr, "error compiling regex \"%s\": %s\n",
regtext, errbuf);
exit(-7);
}
}
sizedata *data = (sizedata *)malloc(n*sizeof(sizedata));
int k;
for(k=0; k<n; k++){
data[k].name = NULL;
data[k].size = 0;
}
string path; struct stat64 buf;
if(optind==argc){
if(lstat64(".", &buf)==-1){
fprintf(stderr, "couldn't stat . (%d): ", __LINE__);
perror(NULL);
}
else{
path = stringfromcstr(".");
find(path, n, data, buf);
stringfree(path);
}
}
else{
qsort(argv+optind, argc-optind, sizeof(char *), comparestrptrs);
for(k=optind; k<argc; k++){
if(k>optind){
int len1 = strlen(argv[k-1]), len2 = strlen(argv[k]);
if(len1==len2 || (len1==len2-1 && argv[k][len2-1]=='/')){
if(!strncmp(argv[k-1], argv[k], len1)){
continue;
}
}
}
if(lstat64(argv[k], &buf)==-1){
fprintf(stderr, "couldn't stat %s (%d): ", argv[k], __LINE__);
perror(NULL);
continue;
}
if(S_ISREG(buf.st_mode)){
insert(argv[k], buf.st_blocks*512, n, data);
continue;
}
if(S_ISLNK(buf.st_mode)){
insert(argv[k], 0, n, data);
continue;
}
if(!S_ISDIR(buf.st_mode)){
fprintf(stderr, "not a directory, file or link (%d): %s\n",
__LINE__, argv[k]);
continue;
}
path = stringfromcstr(argv[k]);
if(path->buf[path->length-1]=='/'){
stringtruncate(path, path->length-1);
}
find(path, n, data, buf);
stringfree(path);
}
}
if(pattern){
regfree(®);
}
for(k=0; k<n; k++){
if(data[k].name==NULL){
break;
}
output(data[k].name, data[k].size, hflag);
free(data[k].name);
}
free(data);
return 0;
}