File should not use isprint ...

Christos Zoulas christos at zoulas.com
Tue Nov 29 20:22:35 EET 2005


On Nov 29,  5:14pm, rvokal at redhat.com (Radek =?ISO-8859-1?Q?Vok=E1l?=) wrote:
-- Subject: Re: File should not use isprint ...

I think that the non-printable case was slightly broken; here is what
I am going to use:


Index: funcs.c
===================================================================
RCS file: /src/pub/file/src/funcs.c,v
retrieving revision 1.17
diff -u -u -r1.17 funcs.c
--- funcs.c	17 Oct 2005 19:03:34 -0000	1.17
+++ funcs.c	29 Nov 2005 18:21:33 -0000
@@ -30,6 +30,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#include <wchar.h>
 
 #ifndef	lint
 FILE_RCSID("@(#)$Id: funcs.c,v 1.17 2005/10/17 19:03:34 christos Exp $")
@@ -152,6 +153,13 @@
 	return 0;
 }
 
+#define OCTALIFY(n, o)	\
+	*(n)++ = '\\', \
+	*(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \
+	*(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \
+	*(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \
+	(o)++
+
 protected const char *
 file_getbuffer(struct magic_set *ms)
 {
@@ -174,14 +182,50 @@
 		ms->o.pbuf = nbuf;
 	}
 
+#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
+	{
+		mbstate_t state;
+		wchar_t nextchar;
+		int mb_conv = 1;
+		size_t bytesconsumed;
+		char *eop;
+		(void)memset(&state, 0, sizeof(mbstate_t));
+
+		np = ms->o.pbuf;
+		op = ms->o.buf;
+		eop = op + strlen(ms->o.buf);
+
+		while (op < eop) {
+			bytesconsumed = mbrtowc(&nextchar, op, eop - op,
+			    &state);
+			if (bytesconsumed == (size_t)(-1) ||
+			    bytesconsumed == (size_t)(-2)) {
+				mb_conv = 0;
+				break;
+			}
+
+			if (iswprint(nextchar) ) {
+				(void)memcpy(np, op, bytesconsumed);
+				op += bytesconsumed;
+				np += bytesconsumed;
+			} else {
+				while (bytesconsumed-- > 0)
+					OCTALIFY(np, op);
+			}
+		}
+		*np = '\0';
+
+		/* Parsing succeeded as a multi-byte sequence */
+		if (mb_conv != 0)
+			return ms->o.pbuf;
+	}
+#endif
+
 	for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
 		if (isprint((unsigned char)*op)) {
 			*np++ = *op;	
 		} else {
-			*np++ = '\\';
-			*np++ = (((uint32_t)*op >> 6) & 3) + '0';
-			*np++ = (((uint32_t)*op >> 3) & 7) + '0';
-			*np++ = (((uint32_t)*op >> 0) & 7) + '0';
+			OCTALIFY(np, op);
 		}
 	}
 	*np = '\0';



More information about the File mailing list