r1372 - in trunk: diffutils grep linux

matthew at linuxfromscratch.org matthew at linuxfromscratch.org
Wed Jan 25 13:40:52 PST 2006


Author: matthew
Date: 2006-01-25 14:40:50 -0700 (Wed, 25 Jan 2006)
New Revision: 1372

Added:
   trunk/diffutils/diffutils-2.8.1-i18n-1.patch
   trunk/grep/grep-2.5.1a-redhat_fixes-2.patch
   trunk/linux/linux-2.6.12.5-utf8_input-2.patch
Log:
Add remaining i18n related patches from Alexander Patrakov

Added: trunk/diffutils/diffutils-2.8.1-i18n-1.patch
===================================================================
--- trunk/diffutils/diffutils-2.8.1-i18n-1.patch	                        (rev 0)
+++ trunk/diffutils/diffutils-2.8.1-i18n-1.patch	2006-01-25 21:40:50 UTC (rev 1372)
@@ -0,0 +1,802 @@
+Submitted by: Alexander E. Patrakov
+Date: 2005-08-13
+Initial Package Version: 2.8.1
+Upstream Status: Unknown, but required for LSB >= 2.0 certification
+Origin: RedHat
+Description: Fixes treatment of whitespace in multibyte locales.
+
+--- diffutils-2.8.4/src/diff.c.i18n	2002-06-17 01:55:42.000000000 -0400
++++ diffutils-2.8.4/src/diff.c	2002-11-16 18:41:37.000000000 -0500
+@@ -275,6 +275,13 @@
+   re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
+   excluded = new_exclude ();
+ 
++#ifdef HANDLE_MULTIBYTE
++  if (MB_CUR_MAX > 1)
++    lines_differ = lines_differ_multibyte;
++  else
++#endif
++    lines_differ = lines_differ_singlebyte;
++
+   /* Decode the options.  */
+ 
+   while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
+--- diffutils-2.8.4/src/diff.h.i18n	2002-11-16 18:31:32.000000000 -0500
++++ diffutils-2.8.4/src/diff.h	2002-11-16 18:48:58.000000000 -0500
+@@ -23,6 +23,19 @@
+ #include "system.h"
+ #include <stdio.h>
+ 
++/* For platform which support the ISO C amendement 1 functionality we
++   support user defined character classes.  */
++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H
++/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
++# include <wchar.h>
++# include <wctype.h>
++# if defined (HAVE_MBRTOWC)
++#  define HANDLE_MULTIBYTE      1
++# endif
++#endif
++
++#define TAB_WIDTH 8
++
+ /* What kind of changes a hunk contains.  */
+ enum changes
+ {
+@@ -350,7 +363,13 @@
+ extern char const pr_program[];
+ char *concat (char const *, char const *, char const *);
+ char *dir_file_pathname (char const *, char const *);
+-bool lines_differ (char const *, char const *);
++
++bool (*lines_differ) (char const *, char const *);
++bool lines_differ_singlebyte (char const *, char const *);
++#ifdef HANDLE_MULTIBYTE
++bool lines_differ_multibyte (char const *, char const *);
++#endif
++
+ lin translate_line_number (struct file_data const *, lin);
+ struct change *find_change (struct change *);
+ struct change *find_reverse_change (struct change *);
+--- diffutils-2.8.4/src/io.c.i18n	2002-06-11 02:06:32.000000000 -0400
++++ diffutils-2.8.4/src/io.c	2002-11-16 18:57:30.000000000 -0500
+@@ -26,6 +26,7 @@
+ #include <regex.h>
+ #include <setmode.h>
+ #include <xalloc.h>
++#include <assert.h>
+ 
+ /* Rotate an unsigned value to the left.  */
+ #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
+@@ -213,6 +214,28 @@
+ 

+ /* Split the file into lines, simultaneously computing the equivalence
+    class for each line.  */
++#ifdef HANDLE_MULTIBYTE
++# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL)			\
++do									\
++{									\
++    mbstate_t state_bak = STATE;					\
++									\
++    CONVFAIL = 0;							\
++    MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE);		\
++									\
++    switch (MBLENGTH)							\
++      {									\
++      case (size_t)-2:							\
++      case (size_t)-1:							\
++	STATE = state_bak;						\
++	++CONVFAIL;							\
++	  /* Fall through. */						\
++      case 0:								\
++	MBLENGTH = 1;							\
++      }									\
++}									\
++while (0)
++#endif
+ 
+ static void
+ find_and_hash_each_line (struct file_data *current)
+@@ -239,12 +262,280 @@
+   bool same_length_diff_contents_compare_anyway =
+     diff_length_compare_anyway | ignore_case;
+ 
++#ifdef HANDLE_MULTIBYTE
++  wchar_t   wc;
++  size_t    mblength;
++  mbstate_t state;
++  int       convfail;
++  
++  memset (&state, '\0', sizeof (mbstate_t));
++#endif
++
+   while ((char const *) p < suffix_begin)
+     {
+       char const *ip = (char const *) p;
+ 
+       h = 0;
++#ifdef HANDLE_MULTIBYTE
++      if (MB_CUR_MAX > 1)
++	{
++	  wchar_t   lo_wc;
++	  char	    mbc[MB_LEN_MAX];
++	  mbstate_t state_wc;
++
++	  /* Hash this line until we find a newline.  */
++	  switch (ignore_white_space)
++	    {
++	    case IGNORE_ALL_SPACE:
++	      while (1)
++		{
++		  if (*p == '\n')
++		    {
++		      ++p;
++		      break;
++		    }
++
++		  MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
++
++		  if (convfail)
++		    mbc[0] = *p++;
++		  else if (!iswspace (wc))
++		    {
++		      bool flag = 0;
++
++		      if (ignore_case)
++			{
++			  lo_wc = towlower (wc);
++			  if (lo_wc != wc)
++			    {
++			      flag = 1;
++
++			      p += mblength;
++			      memset (&state_wc, '\0', sizeof(mbstate_t));
++			      mblength = wcrtomb (mbc, lo_wc, &state_wc);
++
++			      assert (mblength != (size_t)-1 &&
++				  mblength != (size_t)-2);
++
++			      mblength = (mblength < 1) ? 1 : mblength;
++			    }
++			}
++
++		      if (!flag)
++			{
++			  for (i = 0; i < mblength; i++)
++			    mbc[i] =  *p++;
++			}
++		    }
++		  else
++		    {
++		      p += mblength;
++		      continue;
++		    }
++
++		  for (i = 0; i < mblength; i++)
++		    h = HASH (h, mbc[i]);
++		}
++	      break;
++
++	    case IGNORE_SPACE_CHANGE:
++	      while (1)
++		{
++		  if (*p == '\n')
++		    {
++		      ++p;
++		      break;
++		    }
+ 
++		  MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
++
++		  if (!convfail && iswspace (wc))
++		    {
++		      while (1)
++			{
++			  if (*p == '\n')
++			    {
++			      ++p;
++			      goto hashing_done;
++			    }
++
++			  p += mblength;
++			  MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
++			  if (convfail || (!convfail && !iswspace (wc)))
++			    break;
++			}
++		      h = HASH (h, ' ');
++		    }
++
++		  /* WC is now the first non-space.  */
++		  if (convfail)
++		    mbc[0] = *p++;
++		  else
++		    {
++		      bool flag = 0;
++
++		      if (ignore_case)
++			{
++			  lo_wc = towlower (wc);
++			  if (lo_wc != wc)
++			    {
++			      flag = 1;
++
++			      p += mblength;
++			      memset (&state_wc, '\0', sizeof(mbstate_t));
++			      mblength = wcrtomb (mbc, lo_wc, &state_wc);
++
++			      assert (mblength != (size_t)-1 &&
++				  mblength != (size_t)-2);
++
++			      mblength = (mblength < 1) ? 1 : mblength;
++			    }
++			}
++
++		      if (!flag)
++			{
++			  for (i = 0; i < mblength; i++)
++			    mbc[i] = *p++;
++			}
++		    }
++
++		  for (i = 0; i < mblength; i++)
++		    h = HASH (h, mbc[i]);
++		}
++	      break;
++
++	    case IGNORE_TAB_EXPANSION:
++		{
++		  size_t column = 0;
++
++		  while (1)
++		    {
++		      if (*p == '\n')
++			{
++			  ++p;
++			  break;
++			}
++
++		      MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
++
++		      if (convfail)
++			{
++			  h = HASH (h, *p++);
++			  ++column;
++			}
++		      else
++			{
++			  bool flag;
++
++			  switch (wc)
++			    {
++			    case L'\b':
++			      column -= 0 < column;
++			      h = HASH (h, '\b');
++			      ++p;
++			      break;
++
++			    case L'\t':
++				{
++				  int repetitions;
++
++				  repetitions = TAB_WIDTH - column % TAB_WIDTH;
++				  column += repetitions;
++				  do
++				    h = HASH (h, ' ');
++				  while (--repetitions != 0);
++				  ++p;
++				}
++			      break;
++
++			    case L'\r':
++			      column = 0;
++			      h = HASH (h, '\r');
++			      ++p;
++			      break;
++
++			    default:
++			      flag = 0;
++			      column += wcwidth (wc);
++			      if (ignore_case)
++				{
++				  lo_wc = towlower (wc);
++				  if (lo_wc != wc)
++				    {
++				      flag = 1;
++				      p += mblength;
++				      memset (&state_wc, '\0', sizeof(mbstate_t));
++				      mblength = wcrtomb (mbc, lo_wc, &state_wc);
++
++				      assert (mblength != (size_t)-1 &&
++					  mblength != (size_t)-2);
++
++				      mblength = (mblength < 1) ? 1 : mblength;
++				    }
++				}
++
++			      if (!flag)
++				{
++				  for (i = 0; i < mblength; i++)
++				    mbc[i] = *p++;
++				}
++
++			      for (i = 0; i < mblength; i++)
++				h = HASH (h, mbc[i]);
++			    }
++			}
++		    }
++		}
++	      break;
++
++	    default:
++	      while (1)
++		{
++		  if (*p == '\n')
++		    {
++		      ++p;
++		      break;
++		    }
++
++		  MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
++
++		  if (convfail)
++		    mbc[0] = *p++;
++		  else
++		    {
++		      int flag = 0;
++
++		      if (ignore_case)
++			{
++			  lo_wc = towlower (wc);
++			  if (lo_wc != wc)
++			    {
++			      flag = 1;
++			      p += mblength;
++			      memset (&state_wc, '\0', sizeof(mbstate_t));
++			      mblength = wcrtomb (mbc, lo_wc, &state_wc);
++
++			      assert (mblength != (size_t)-1 &&
++				  mblength != (size_t)-2);
++
++			      mblength = (mblength < 1) ? 1 : mblength;
++			    }
++			}
++
++		      if (!flag)
++			{
++			  for (i = 0; i < mblength; i++)
++			    mbc[i] = *p++;
++			}
++		    }
++
++		  for (i = 0; i < mblength; i++)
++		    h = HASH (h, mbc[i]);
++		}
++	    }
++	}
++      else
++#endif
+       /* Hash this line until we find a newline.  */
+       if (ignore_case)
+ 	switch (ignore_white_space)
+--- diffutils-2.8.4/src/side.c.i18n	2002-06-11 02:06:32.000000000 -0400
++++ diffutils-2.8.4/src/side.c	2002-11-16 18:41:37.000000000 -0500
+@@ -73,11 +73,72 @@
+   register size_t out_position = 0;
+   register char const *text_pointer = line[0];
+   register char const *text_limit = line[1];
++#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
++  unsigned char mbc[MB_LEN_MAX];
++  wchar_t wc;
++  mbstate_t state, state_bak;
++  size_t mbc_pos, mblength;
++  int mbc_loading_flag = 0;
++  int wc_width;
++
++  memset (&state, '\0', sizeof (mbstate_t));
++#endif
+ 
+   while (text_pointer < text_limit)
+     {
+       register unsigned char c = *text_pointer++;
+ 
++#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
++      if (MB_CUR_MAX > 1 && mbc_loading_flag)
++	{
++	  mbc_loading_flag = 0;
++	  state_bak = state;
++	  mbc[mbc_pos++] = c;
++
++process_mbc:
++	  mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
++
++	  switch (mblength)
++	    {
++	    case (size_t)-2:	/* Incomplete multibyte character. */
++	      mbc_loading_flag = 1;
++	      state = state_bak;
++	      break;
++
++	    case (size_t)-1:	/* Invalid as a multibyte character. */
++	      if (in_position++ < out_bound)
++		{
++		  out_position = in_position;
++		  putc (mbc[0], out);
++		}
++	      memmove (mbc, mbc + 1, --mbc_pos);
++	      if (mbc_pos > 0)
++		{
++		  mbc[mbc_pos] = '\0';
++		  goto process_mbc;
++		}
++	      break;
++
++	    default:
++	      wc_width = wcwidth (wc);
++	      if (wc_width < 1)	/* Unprintable multibyte character. */
++		{
++		  if (in_position <= out_bound)
++		    fprintf (out, "%lc", (wint_t)wc);
++		}
++	      else		/* Printable multibyte character. */
++		{
++		  in_position += wc_width;
++		  if (in_position <= out_bound)
++		    {
++		      out_position = in_position;
++		      fprintf (out, "%lc", (wint_t)wc);
++		    }
++		}
++	    }
++	  continue;
++	}
++#endif
+       switch (c)
+ 	{
+ 	case '\t':
+@@ -135,8 +196,39 @@
+ 	  break;
+ 
+ 	default:
+-	  if (! ISPRINT (c))
+-	    goto control_char;
++#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
++	  if (MB_CUR_MAX > 1)
++	    {
++	      memset (mbc, '\0', MB_LEN_MAX);
++	      mbc_pos = 0;
++	      mbc[mbc_pos++] = c;
++	      state_bak = state;
++
++	      mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
++
++	      /* The value of mblength is always less than 2 here. */
++	      switch (mblength)
++		{
++		case (size_t)-2:	/* Incomplete multibyte character. */
++		  state = state_bak;
++		  mbc_loading_flag = 1;
++		  continue;
++
++		case (size_t)-1:	/* Invalid as a multibyte character. */
++		  state = state_bak;
++		  break;
++
++		default:
++		  if (! iswprint (wc))
++		    goto control_char;
++		}
++	    }
++	  else
++#endif
++	    {
++	      if (! ISPRINT (c))
++		goto control_char;
++	    }
+ 	  /* falls through */
+ 	case ' ':
+ 	  if (in_position++ < out_bound)
+--- diffutils-2.8.4/src/util.c.i18n	2002-06-11 02:06:32.000000000 -0400
++++ diffutils-2.8.4/src/util.c	2002-11-16 18:41:37.000000000 -0500
+@@ -321,7 +321,7 @@
+    Return nonzero if the lines differ.  */
+ 
+ bool
+-lines_differ (char const *s1, char const *s2)
++lines_differ_singlebyte (char const *s1, char const *s2)
+ {
+   register unsigned char const *t1 = (unsigned char const *) s1;
+   register unsigned char const *t2 = (unsigned char const *) s2;
+@@ -450,6 +450,293 @@
+ 
+   return 1;
+ }
++
++#ifdef HANDLE_MULTIBYTE
++# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL)			\
++do									\
++{									\
++    mbstate_t bak = STATE;						\
++									\
++    CONVFAIL = 0;							\
++    MBLENGTH = mbrtowc (&WC, T, END - T, &STATE);			\
++									\
++    switch (MBLENGTH)							\
++      {									\
++      case (size_t)-2:							\
++      case (size_t)-1:							\
++	STATE = bak;							\
++	++CONVFAIL;							\
++	  /* Fall through. */						\
++      case 0:								\
++	MBLENGTH = 1;							\
++      }									\
++}									\
++while (0)
++
++bool
++lines_differ_multibyte (char const *s1, char const *s2)
++{
++  unsigned char const *end1, *end2;
++  unsigned char c1, c2;
++  wchar_t wc1, wc2, wc1_bak, wc2_bak;
++  size_t mblen1, mblen2;
++  mbstate_t state1, state2, state1_bak, state2_bak;
++  int convfail1, convfail2, convfail1_bak, convfail2_bak;
++  
++  unsigned char const *t1 = (unsigned char const *) s1;
++  unsigned char const *t2 = (unsigned char const *) s2;
++  unsigned char const *t1_bak, *t2_bak;
++  size_t column = 0;
++
++  if (ignore_white_space == IGNORE_NO_WHITE_SPACE  && !ignore_case)
++    {
++      while (*t1 != '\n')
++	if (*t1++ != * t2++)
++	  return 1;
++      return 0;
++    }
++
++  memset (&state1, '\0', sizeof (mbstate_t));
++  memset (&state2, '\0', sizeof (mbstate_t));
++
++  end1 = s1 + strlen (s1);
++  end2 = s2 + strlen (s2);
++
++  while (1)
++    {
++      c1 = *t1;
++      c2 = *t2;
++      MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
++      MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
++
++      /* Test for exact char equality first, since it's a common case.  */
++      if (convfail1 ^ convfail2)
++	break;
++      else if (convfail1 && convfail2 && c1 != c2)
++	break;
++      else if (!convfail1 && !convfail2 && wc1 != wc2)
++	{
++	  switch (ignore_white_space)
++	    {
++	    case IGNORE_ALL_SPACE:
++	      /* For -w, just skip past any white space.  */
++	      while (1)
++		{
++		  if (convfail1)
++		    break;
++		  else if (wc1 == L'\n' || !iswspace (wc1))
++		    break;
++
++		  t1 += mblen1;
++		  c1 = *t1;
++		  MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
++		}
++
++	      while (1)
++		{
++		  if (convfail2)
++		    break;
++		  else if (wc2 == L'\n' || !iswspace (wc2))
++		    break;
++
++		  t2 += mblen2;
++		  c2 = *t2;
++		  MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
++		}
++	      t1 += mblen1;
++	      t2 += mblen2;
++	      break;
++
++	    case IGNORE_SPACE_CHANGE:
++	      /* For -b, advance past any sequence of white space in
++		 line 1 and consider it just one space, or nothing at
++		 all if it is at the end of the line.  */
++	      if (wc1 != L'\n' && iswspace (wc1))
++		{
++		  size_t mblen_bak;
++		  mbstate_t state_bak;
++
++		  do
++		    {
++		      t1 += mblen1;
++		      mblen_bak = mblen1;
++		      state_bak = state1;
++		      MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
++		    }
++		  while (!convfail1 && (wc1 != L'\n' && iswspace (wc1)));
++
++		  state1 = state_bak;
++		  mblen1 = mblen_bak;
++		  t1 -= mblen1;
++		  convfail1 = 0;
++		  wc1 = L' ';
++		}
++
++	      /* Likewise for line 2.  */
++	      if (wc2 != L'\n' && iswspace (wc2))
++		{
++		  size_t mblen_bak;
++		  mbstate_t state_bak;
++
++		  do
++		    {
++		      t2 += mblen2;
++		      mblen_bak = mblen2;
++		      state_bak = state2;
++		      MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
++		    }
++		  while (!convfail2 && (wc2 != L'\n' && iswspace (wc2)));
++
++		  state2 = state_bak;
++		  mblen2 = mblen_bak;
++		  t2 -= mblen2;
++		  convfail2 = 0;
++		  wc2 = L' ';
++		}
++
++	      if (wc1 != wc2)
++		{
++		  if (wc2 == L' ' && wc1 != L'\n' &&
++		      t1 > (unsigned char const *)s1 &&
++		      !convfail1_bak && iswspace (wc1_bak))
++		    {
++		      t1 = t1_bak;
++		      wc1 = wc1_bak;
++		      state1 = state1_bak;
++		      convfail1 = convfail1_bak;
++		      continue;
++		    }
++		  if (wc1 == L' ' && wc2 != L'\n'
++		      && t2 > (unsigned char const *)s2
++		      && !convfail2_bak && iswspace (wc2_bak))
++		    {
++		      t2 = t2_bak;
++		      wc2 = wc2_bak;
++		      state2 = state2_bak;
++		      convfail2 = convfail2_bak;
++		      continue;
++		    }
++		}
++
++	      t1_bak = t1;		  t2_bak = t2;
++	      wc1_bak = wc1;		  wc2_bak = wc2;
++	      state1_bak = state1;	  state2_bak = state2;
++	      convfail1_bak = convfail1;  convfail2_bak = convfail2;
++
++	      if (wc1 == L'\n')
++		wc1 = L' ';
++	      else
++		t1 += mblen1;
++
++	      if (wc2 == L'\n')
++		wc2 = L' ';
++	      else
++		t2 += mblen2;
++
++	      break;
++
++	    case IGNORE_TAB_EXPANSION:
++	      if ((wc1 == L' ' && wc2 == L'\t')
++		  || (wc1 == L'\t' && wc2 == L' '))
++		{
++		  size_t column2 = column;
++
++		  while (1)
++		    {
++		      if (convfail1)
++			{
++			  ++t1;
++			  break;
++			}
++		      else if (wc1 == L' ')
++			column++;
++		      else if (wc1 == L'\t')
++			column += TAB_WIDTH - column % TAB_WIDTH;
++		      else
++			{
++			  t1 += mblen1;
++			  break;
++			}
++
++		      t1 += mblen1;
++		      c1 = *t1;
++		      MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
++		    }
++
++		  while (1)
++		    {
++		      if (convfail2)
++			{
++			  ++t2;
++			  break;
++			}
++		      else if (wc2 == L' ')
++			column2++;
++		      else if (wc2 == L'\t')
++			column2 += TAB_WIDTH - column2 % TAB_WIDTH;
++		      else
++			{
++			  t2 += mblen2;
++			  break;
++			}
++
++		      t2 += mblen2;
++		      c2 = *t2;
++		      MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
++		    }
++
++		  if (column != column2)
++		    return 1;
++		}
++	      else
++		{
++		  t1 += mblen1;
++		  t2 += mblen2;
++		}
++	      break;
++
++	    case IGNORE_NO_WHITE_SPACE:
++	      t1 += mblen1;
++	      t2 += mblen2;
++	      break;
++	    }
++
++	  /* Lowercase all letters if -i is specified.  */
++	  if (ignore_case)
++	    {
++	      if (!convfail1)
++		wc1 = towlower (wc1);
++	      if (!convfail2)
++		wc2 = towlower (wc2);
++	    }
++
++	  if (convfail1 ^ convfail2)
++	    break;
++	  else if (convfail1 && convfail2 && c1 != c2)
++	    break;
++	  else if (!convfail1 && !convfail2 && wc1 != wc2)
++	    break;
++	}
++      else
++	{
++	  t1_bak = t1;			t2_bak = t2;
++	  wc1_bak = wc1;		wc2_bak = wc2;
++	  state1_bak = state1;		state2_bak = state2;
++	  convfail1_bak = convfail1;	convfail2_bak = convfail2;
++
++	  t1 += mblen1;			t2 += mblen2;
++	}
++      
++      if (!convfail1 && wc1 == L'\n')
++	return 0;
++
++      column += convfail1 ? 1 :
++	(wc1 == L'\t') ? TAB_WIDTH - column % TAB_WIDTH : wcwidth (wc1);
++    }
++
++  return 1;
++}
++#endif
+ 

+ /* Find the consecutive changes at the start of the script START.
+    Return the last link before the first gap.  */

Added: trunk/grep/grep-2.5.1a-redhat_fixes-2.patch
===================================================================
--- trunk/grep/grep-2.5.1a-redhat_fixes-2.patch	                        (rev 0)
+++ trunk/grep/grep-2.5.1a-redhat_fixes-2.patch	2006-01-25 21:40:50 UTC (rev 1372)
@@ -0,0 +1,2109 @@
+Submitted by: Alexander E. Patrakov
+Date: 2005-08-13
+Initial Package Version: 2.5.1a
+Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification
+Origin: RedHat
+Description: Various fixes from RedHat. Individual patches:
+
+   grep-2.5.1-fgrep.patch
+   grep-2.5.1-bracket.patch
+   grep-2.5-i18n.patch
+   grep-2.5.1-oi.patch
+   grep-2.5.1-manpage.patch
+   grep-2.5.1-color.patch
+   grep-2.5.1-icolor.patch
+   grep-2.5.1-egf-speedup.patch
+   grep-2.5.1-dfa-optional.patch
+   grep-2.5.1-tests.patch
+   grep-2.5.1-w.patch
+
+Testcases:
+
+ -fgrep: ???, but required for other patches
+ -bracket: echo "[" | LANG=en_US.UTF-8 grep "[[:space:]]"
+ -i18n: many fixes for multibyte locale support, required for LSB.
+ -oi: echo xxYYzz | LANG=C grep -i -o yy
+ -manpage: typo
+ -color: restore the background color correctly
+ -icolor: ??? echo 'spam foo SPAM FOO' | grep -i --color spam
+     (but that's also fixed by -oi. Is this patch just a cleanup?)
+ -egf-speedup: without this, grep is as slow as a snail in UTF-8 locales.
+ -dfa-optional: disables dfa in multibyte locales by default.
+ -w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile
+
+diff -urN grep-2.5.1a.orig/doc/grep.1 grep-2.5.1a/doc/grep.1
+--- grep-2.5.1a.orig/doc/grep.1	2004-11-12 16:26:37.000000000 +0500
++++ grep-2.5.1a/doc/grep.1	2005-10-23 09:49:43.000000000 +0600
+@@ -191,6 +191,7 @@
+ .I PATTERN
+ as a list of fixed strings, separated by newlines,
+ any of which is to be matched.
++.TP
+ .BR \-P ", " \-\^\-perl-regexp
+ Interpret
+ .I PATTERN
+@@ -302,7 +303,7 @@
+ This is especially useful for tools like zgrep, e.g.
+ .B "gzip -cd foo.gz |grep --label=foo something"
+ .TP
+-.BR \-\^\-line-buffering
++.BR \-\^\-line-buffered
+ Use line buffering, it can be a performance penality.
+ .TP
+ .BR \-q ", " \-\^\-quiet ", " \-\^\-silent
+diff -urN grep-2.5.1a.orig/lib/posix/regex.h grep-2.5.1a/lib/posix/regex.h
+--- grep-2.5.1a.orig/lib/posix/regex.h	2001-04-02 23:56:50.000000000 +0600
++++ grep-2.5.1a/lib/posix/regex.h	2005-10-23 09:49:31.000000000 +0600
+@@ -109,6 +109,10 @@
+    If not set, \{, \}, {, and } are literals.  */
+ #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+ 
++/* If this bit is set, then ignore case when matching.
++   If not set, then case is significant.  */
++#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
++
+ /* If this bit is set, +, ? and | aren't recognized as operators.
+    If not set, they are.  */
+ #define RE_LIMITED_OPS (RE_INTERVALS << 1)
+diff -urN grep-2.5.1a.orig/src/dfa.c grep-2.5.1a/src/dfa.c
+--- grep-2.5.1a.orig/src/dfa.c	2001-09-26 22:57:55.000000000 +0600
++++ grep-2.5.1a/src/dfa.c	2005-10-23 09:49:17.000000000 +0600
+@@ -414,7 +414,7 @@
+ 
+ /* This function fetch a wide character, and update cur_mb_len,
+    used only if the current locale is a multibyte environment.  */
+-static wchar_t
++static wint_t
+ fetch_wc (char const *eoferr)
+ {
+   wchar_t wc;
+@@ -423,7 +423,7 @@
+       if (eoferr != 0)
+ 	dfaerror (eoferr);
+       else
+-	return -1;
++	return WEOF;
+     }
+ 
+   cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
+@@ -459,7 +459,7 @@
+ static void
+ parse_bracket_exp_mb ()
+ {
+-  wchar_t wc, wc1, wc2;
++  wint_t wc, wc1, wc2;
+ 
+   /* Work area to build a mb_char_classes.  */
+   struct mb_char_classes *work_mbc;
+@@ -496,7 +496,7 @@
+     work_mbc->invert = 0;
+   do
+     {
+-      wc1 = -1; /* mark wc1 is not initialized".  */
++      wc1 = WEOF; /* mark wc1 is not initialized".  */
+ 
+       /* Note that if we're looking at some other [:...:] construct,
+ 	 we just treat it as a bunch of ordinary characters.  We can do
+@@ -586,7 +586,7 @@
+ 		      work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
+ 		    }
+  		}
+-	      wc = -1;
++	      wc1 = wc = WEOF;
+ 	    }
+ 	  else
+ 	    /* We treat '[' as a normal character here.  */
+@@ -600,7 +600,7 @@
+ 	    wc = fetch_wc(("Unbalanced ["));
+ 	}
+ 
+-      if (wc1 == -1)
++      if (wc1 == WEOF)
+ 	wc1 = fetch_wc(_("Unbalanced ["));
+ 
+       if (wc1 == L'-')
+@@ -630,17 +630,17 @@
+ 	    }
+ 	  REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
+ 			       range_sts_al, work_mbc->nranges + 1);
+-	  work_mbc->range_sts[work_mbc->nranges] = wc;
++	  work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
+ 	  REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
+ 			       range_ends_al, work_mbc->nranges + 1);
+-	  work_mbc->range_ends[work_mbc->nranges++] = wc2;
++	  work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
+ 	}
+-      else if (wc != -1)
++      else if (wc != WEOF)
+ 	/* build normal characters.  */
+ 	{
+ 	  REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
+ 			       work_mbc->nchars + 1);
+-	  work_mbc->chars[work_mbc->nchars++] = wc;
++	  work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
+ 	}
+     }
+   while ((wc = wc1) != L']');
+@@ -2552,6 +2552,8 @@
+     }
+ 
+   /* match with a character?  */
++  if (case_fold)
++    wc = towlower (wc);
+   for (i = 0; i<work_mbc->nchars; i++)
+     {
+       if (wc == work_mbc->chars[i])
+diff -urN grep-2.5.1a.orig/src/grep.c grep-2.5.1a/src/grep.c
+--- grep-2.5.1a.orig/src/grep.c	2004-11-12 16:25:35.000000000 +0500
++++ grep-2.5.1a/src/grep.c	2005-10-23 09:50:06.000000000 +0600
+@@ -30,6 +30,12 @@
+ # include <sys/time.h>
+ # include <sys/resource.h>
+ #endif
++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
++/* We can handle multibyte string.  */
++# define MBS_SUPPORT
++# include <wchar.h>
++# include <wctype.h>
++#endif
+ #include <stdio.h>
+ #include "system.h"
+ #include "getopt.h"
+@@ -558,33 +564,6 @@
+     {
+       size_t match_size;
+       size_t match_offset;
+-      if(match_icase)
+-        {
+-	  /* Yuck, this is tricky */
+-          char *buf = (char*) xmalloc (lim - beg);
+-	  char *ibeg = buf;
+-	  char *ilim = ibeg + (lim - beg);
+-	  int i;
+-	  for (i = 0; i < lim - beg; i++)
+-	    ibeg[i] = tolower (beg[i]);
+-	  while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
+-		 != (size_t) -1)
+-	    {
+-	      char const *b = beg + match_offset;
+-	      if (b == lim)
+-		break;
+-	      fwrite (beg, sizeof (char), match_offset, stdout);
+-	      printf ("\33[%sm", grep_color);
+-	      fwrite (b, sizeof (char), match_size, stdout);
+-	      fputs ("\33[00m", stdout);
+-	      beg = b + match_size;
+-	      ibeg = ibeg + match_offset + match_size;
+-	    }
+-	  fwrite (beg, 1, lim - beg, stdout);
+-	  free (buf);
+-	  lastout = lim;
+-	  return;
+-	}
+       while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
+ 	     != (size_t) -1)
+ 	{
+@@ -601,6 +580,7 @@
+ 	  fputs ("\33[00m", stdout);
+ 	  beg = b + match_size;
+ 	}
++      fputs ("\33[K", stdout);
+     }
+   fwrite (beg, 1, lim - beg, stdout);
+   if (ferror (stdout))
+@@ -1697,6 +1677,37 @@
+   if (!install_matcher (matcher) && !install_matcher ("default"))
+     abort ();
+ 
++#ifdef MBS_SUPPORT
++  if (MB_CUR_MAX != 1 && match_icase)
++    {
++      wchar_t wc;
++      mbstate_t cur_state, prev_state;
++      int i, len = strlen(keys);
++
++      memset(&cur_state, 0, sizeof(mbstate_t));
++      for (i = 0; i <= len ;)
++	{
++	  size_t mbclen;
++	  mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
++	  if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
++	    {
++	      /* An invalid sequence, or a truncated multibyte character.
++		 We treat it as a singlebyte character.  */
++	      mbclen = 1;
++	    }
++	  else
++	    {
++	      if (iswupper((wint_t)wc))
++		{
++		  wc = towlower((wint_t)wc);
++		  wcrtomb(keys + i, wc, &cur_state);
++		}
++	    }
++	  i += mbclen;
++	}
++    }
++#endif /* MBS_SUPPORT */
++
+   (*compile)(keys, keycc);
+ 
+   if ((argc - optind > 1 && !no_filenames) || with_filenames)
+diff -urN grep-2.5.1a.orig/src/search.c grep-2.5.1a/src/search.c
+--- grep-2.5.1a.orig/src/search.c	2001-04-19 09:42:14.000000000 +0600
++++ grep-2.5.1a/src/search.c	2005-10-23 09:51:25.000000000 +0600
+@@ -18,9 +18,13 @@
+ 
+ /* Written August 1992 by Mike Haertel. */
+ 
++#ifndef _GNU_SOURCE
++# define _GNU_SOURCE 1
++#endif
+ #ifdef HAVE_CONFIG_H
+ # include <config.h>
+ #endif
++#include <assert.h>
+ #include <sys/types.h>
+ #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+ /* We can handle multibyte string.  */
+@@ -31,7 +35,7 @@
+ 
+ #include "system.h"
+ #include "grep.h"
+-#include "regex.h"
++#include <regex.h>
+ #include "dfa.h"
+ #include "kwset.h"
+ #include "error.h"
+@@ -39,6 +43,9 @@
+ #ifdef HAVE_LIBPCRE
+ # include <pcre.h>
+ #endif
++#ifdef HAVE_LANGINFO_CODESET
++# include <langinfo.h>
++#endif
+ 
+ #define NCHAR (UCHAR_MAX + 1)
+ 
+@@ -70,9 +77,10 @@
+    call the regexp matcher at all. */
+ static int kwset_exact_matches;
+ 
+-#if defined(MBS_SUPPORT)
+-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+-#endif
++/* UTF-8 encoding allows some optimizations that we can't otherwise
++   assume in a multibyte encoding. */
++static int using_utf8;
++
+ static void kwsinit PARAMS ((void));
+ static void kwsmusts PARAMS ((void));
+ static void Gcompile PARAMS ((char const *, size_t));
+@@ -84,6 +92,15 @@
+ static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
+ 
+ void
++check_utf8 (void)
++{
++#ifdef HAVE_LANGINFO_CODESET
++  if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
++    using_utf8 = 1;
++#endif
++}
++
++void
+ dfaerror (char const *mesg)
+ {
+   error (2, 0, mesg);
+@@ -141,38 +158,6 @@
+     }
+ }
+ 
+-#ifdef MBS_SUPPORT
+-/* This function allocate the array which correspond to "buf".
+-   Then this check multibyte string and mark on the positions which
+-   are not singlebyte character nor the first byte of a multibyte
+-   character.  Caller must free the array.  */
+-static char*
+-check_multibyte_string(char const *buf, size_t size)
+-{
+-  char *mb_properties = malloc(size);
+-  mbstate_t cur_state;
+-  int i;
+-  memset(&cur_state, 0, sizeof(mbstate_t));
+-  memset(mb_properties, 0, sizeof(char)*size);
+-  for (i = 0; i < size ;)
+-    {
+-      size_t mbclen;
+-      mbclen = mbrlen(buf + i, size - i, &cur_state);
+-
+-      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+-	{
+-	  /* An invalid sequence, or a truncated multibyte character.
+-	     We treat it as a singlebyte character.  */
+-	  mbclen = 1;
+-	}
+-      mb_properties[i] = mbclen;
+-      i += mbclen;
+-    }
+-
+-  return mb_properties;
+-}
+-#endif
+-
+ static void
+ Gcompile (char const *pattern, size_t size)
+ {
+@@ -181,7 +166,8 @@
+   size_t total = size;
+   char const *motif = pattern;
+ 
+-  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
++  check_utf8 ();
++  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
+   dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+ 
+   /* For GNU regex compiler we have to pass the patterns separately to detect
+@@ -233,7 +219,7 @@
+       static char const line_end[] = "\\)$";
+       static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
+       static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
+-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
+       size_t i;
+       strcpy (n, match_lines ? line_beg : word_beg);
+       i = strlen (n);
+@@ -257,14 +243,15 @@
+   size_t total = size;
+   char const *motif = pattern;
+ 
++  check_utf8 ();
+   if (strcmp (matcher, "awk") == 0)
+     {
+-      re_set_syntax (RE_SYNTAX_AWK);
++      re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
+       dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
+     }
+   else
+     {
+-      re_set_syntax (RE_SYNTAX_POSIX_EGREP);
++      re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
+       dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
+     }
+ 
+@@ -316,7 +303,7 @@
+       static char const line_end[] = ")$";
+       static char const word_beg[] = "(^|[^[:alnum:]_])(";
+       static char const word_end[] = ")([^[:alnum:]_]|$)";
+-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
+       size_t i;
+       strcpy (n, match_lines ? line_beg : word_beg);
+       i = strlen(n);
+@@ -339,15 +326,35 @@
+   char eol = eolbyte;
+   int backref, start, len;
+   struct kwsmatch kwsm;
+-  size_t i;
++  size_t i, ret_val;
++  static int use_dfa;
++  static int use_dfa_checked = 0;
+ #ifdef MBS_SUPPORT
+-  char *mb_properties = NULL;
++  const char *last_char = NULL;
++  int mb_cur_max = MB_CUR_MAX;
++  mbstate_t mbs;
++  memset (&mbs, '\0', sizeof (mbstate_t));
+ #endif /* MBS_SUPPORT */
+ 
++  if (!use_dfa_checked)
++    {
++      char *grep_use_dfa = getenv ("GREP_USE_DFA");
++      if (!grep_use_dfa)
++	{
+ #ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1 && kwset)
+-    mb_properties = check_multibyte_string(buf, size);
++	  /* Turn off DFA when processing multibyte input. */
++	  use_dfa = (MB_CUR_MAX == 1);
++#else
++	  use_dfa = 1;
+ #endif /* MBS_SUPPORT */
++	}
++      else
++	{
++	  use_dfa = atoi (grep_use_dfa);
++	}
++
++      use_dfa_checked = 1;
++    }
+ 
+   buflim = buf + size;
+ 
+@@ -358,47 +365,124 @@
+ 	  if (kwset)
+ 	    {
+ 	      /* Find a possible match using the KWset matcher. */
+-	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
++#ifdef MBS_SUPPORT
++	      size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
++	      size_t offset;
++#ifdef MBS_SUPPORT
++	      /* kwsexec doesn't work with match_icase and multibyte input. */
++	      if (match_icase && mb_cur_max > 1)
++		/* Avoid kwset */
++		offset = 0;
++	      else
++#endif /* MBS_SUPPORT */
++	      offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+ 	      if (offset == (size_t) -1)
+-		{
++	        goto failure;
+ #ifdef MBS_SUPPORT
+-		  if (MB_CUR_MAX > 1)
+-		    free(mb_properties);
+-#endif
+-		  return (size_t)-1;
++	      if (mb_cur_max > 1 && !using_utf8)
++		{
++		  bytes_left = offset;
++		  while (bytes_left)
++		    {
++		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++		      last_char = beg;
++		      if (mlen == (size_t) -1 || mlen == 0)
++			{
++			  /* Incomplete character: treat as single-byte. */
++			  memset (&mbs, '\0', sizeof (mbstate_t));
++			  beg++;
++			  bytes_left--;
++			  continue;
++			}
++
++		      if (mlen == (size_t) -2)
++			/* Offset points inside multibyte character:
++			 * no good. */
++			break;
++
++		      beg += mlen;
++		      bytes_left -= mlen;
++		    }
+ 		}
++	      else
++#endif /* MBS_SUPPORT */
+ 	      beg += offset;
+ 	      /* Narrow down to the line containing the candidate, and
+ 		 run it through DFA. */
+ 	      end = memchr(beg, eol, buflim - beg);
+ 	      end++;
+ #ifdef MBS_SUPPORT
+-	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
++	      if (mb_cur_max > 1 && bytes_left)
+ 		continue;
+-#endif
++#endif /* MBS_SUPPORT */
+ 	      while (beg > buf && beg[-1] != eol)
+ 		--beg;
+-	      if (kwsm.index < kwset_exact_matches)
+-		goto success;
+-	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
++	      if (
++#ifdef MBS_SUPPORT
++		  !(match_icase && mb_cur_max > 1) &&
++#endif /* MBS_SUPPORT */
++		  (kwsm.index < kwset_exact_matches))
++		goto success_in_beg_and_end;
++	      if (use_dfa &&
++		  dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ 		continue;
+ 	    }
+ 	  else
+ 	    {
+ 	      /* No good fixed strings; start with DFA. */
+-	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
++#ifdef MBS_SUPPORT
++	      size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
++	      size_t offset = 0;
++	      if (use_dfa)
++		offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ 	      if (offset == (size_t) -1)
+ 		break;
+ 	      /* Narrow down to the line we've found. */
++#ifdef MBS_SUPPORT
++	      if (mb_cur_max > 1 && !using_utf8)
++		{
++		  bytes_left = offset;
++		  while (bytes_left)
++		    {
++		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++		      last_char = beg;
++		      if (mlen == (size_t) -1 || mlen == 0)
++			{
++			  /* Incomplete character: treat as single-byte. */
++			  memset (&mbs, '\0', sizeof (mbstate_t));
++			  beg++;
++			  bytes_left--;
++			  continue;
++			}
++
++		      if (mlen == (size_t) -2)
++			/* Offset points inside multibyte character:
++			 * no good. */
++			break;
++
++		      beg += mlen;
++		      bytes_left -= mlen;
++		    }
++		}
++	      else
++#endif /* MBS_SUPPORT */
+ 	      beg += offset;
+ 	      end = memchr (beg, eol, buflim - beg);
+ 	      end++;
++#ifdef MBS_SUPPORT
++	      if (mb_cur_max > 1 && bytes_left)
++		continue;
++#endif /* MBS_SUPPORT */
+ 	      while (beg > buf && beg[-1] != eol)
+ 		--beg;
+ 	    }
+ 	  /* Successful, no backreferences encountered! */
+-	  if (!backref)
+-	    goto success;
++	  if (use_dfa && !backref)
++	    goto success_in_beg_and_end;
+ 	}
+       else
+ 	end = beg + size;
+@@ -413,14 +497,11 @@
+ 				       end - beg - 1, &(patterns[i].regs))))
+ 	    {
+ 	      len = patterns[i].regs.end[0] - start;
+-	      if (exact)
+-		{
+-		  *match_size = len;
+-		  return start;
+-		}
++	      if (exact && !match_words)
++	        goto success_in_start_and_len;
+ 	      if ((!match_lines && !match_words)
+ 		  || (match_lines && len == end - beg - 1))
+-		goto success;
++		goto success_in_beg_and_end;
+ 	      /* If -w, check if the match aligns with word boundaries.
+ 		 We do this iteratively because:
+ 		 (a) the line may contain more than one occurence of the
+@@ -431,10 +512,84 @@
+ 	      if (match_words)
+ 		while (start >= 0)
+ 		  {
+-		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+-			&& (len == end - beg - 1
+-			    || !WCHAR ((unsigned char) beg[start + len])))
+-		      goto success;
++		    int lword_match = 0;
++		    if (start == 0)
++		      lword_match = 1;
++		    else
++		      {
++			assert (start > 0);
++#ifdef MBS_SUPPORT
++			if (mb_cur_max > 1)
++			  {
++			    const char *s;
++			    int mr;
++			    wchar_t pwc;
++
++			    if (using_utf8)
++			      {
++				s = beg + start - 1;
++				while (s > buf
++				       && (unsigned char) *s >= 0x80
++				       && (unsigned char) *s <= 0xbf)
++				  --s;
++			      }
++			    else
++			      s = last_char;
++			    mr = mbtowc (&pwc, s, beg + start - s);
++			    if (mr <= 0)
++			      {
++				memset (&mbs, '\0', sizeof (mbstate_t));
++				lword_match = 1;
++			      }
++			    else if (!(iswalnum (pwc) || pwc == L'_')
++				     && mr == (int) (beg + start - s))
++			      lword_match = 1;
++			  }
++			else
++#endif /* MBS_SUPPORT */
++			if (!WCHAR ((unsigned char) beg[start - 1]))
++			  lword_match = 1;
++		      }
++
++		    if (lword_match)
++		      {
++			int rword_match = 0;
++			if (start + len == end - beg - 1)
++			  rword_match = 1;
++			else
++			  {
++#ifdef MBS_SUPPORT
++			    if (mb_cur_max > 1)
++			      {
++				wchar_t nwc;
++				int mr;
++
++				mr = mbtowc (&nwc, beg + start + len,
++					     end - beg - start - len - 1);
++				if (mr <= 0)
++				  {
++				    memset (&mbs, '\0', sizeof (mbstate_t));
++				    rword_match = 1;
++				  }
++				else if (!iswalnum (nwc) && nwc != L'_')
++				  rword_match = 1;
++			      }
++			    else
++#endif /* MBS_SUPPORT */
++			    if (!WCHAR ((unsigned char) beg[start + len]))
++			      rword_match = 1;
++			  }
++
++			if (rword_match)
++			  {
++			    if (!exact)
++			      /* Returns the whole line. */
++			      goto success_in_beg_and_end;
++			    else
++			      /* Returns just this word match. */
++			      goto success_in_start_and_len;
++			  }
++		      }
+ 		    if (len > 0)
+ 		      {
+ 			/* Try a shorter length anchored at the same place. */
+@@ -461,26 +616,154 @@
+ 	    }
+ 	} /* for Regex patterns.  */
+     } /* for (beg = end ..) */
+-#ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1 && mb_properties)
+-    free (mb_properties);
+-#endif /* MBS_SUPPORT */
++
++ failure:
+   return (size_t) -1;
+ 
+- success:
+-#ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1 && mb_properties)
+-    free (mb_properties);
+-#endif /* MBS_SUPPORT */
+-  *match_size = end - beg;
+-  return beg - buf;
++ success_in_beg_and_end:
++  len = end - beg;
++  start = beg - buf;
++  /* FALLTHROUGH */
++
++ success_in_start_and_len:
++  *match_size = len;
++  return start;
+ }
+ 
++#ifdef MBS_SUPPORT
++static int f_i_multibyte; /* whether we're using the new -Fi MB method */
++static struct
++{
++  wchar_t **patterns;
++  size_t count, maxlen;
++  unsigned char *match;
++} Fimb;
++#endif
++
+ static void
+ Fcompile (char const *pattern, size_t size)
+ {
++  int mb_cur_max = MB_CUR_MAX;
+   char const *beg, *lim, *err;
+ 
++  check_utf8 ();
++#ifdef MBS_SUPPORT
++  /* Support -F -i for UTF-8 input. */
++  if (match_icase && mb_cur_max > 1)
++    {
++      mbstate_t mbs;
++      wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
++      const char *patternend = pattern;
++      size_t wcsize;
++      kwset_t fimb_kwset = NULL;
++      char *starts = NULL;
++      wchar_t *wcbeg, *wclim;
++      size_t allocated = 0;
++
++      memset (&mbs, '\0', sizeof (mbs));
++# ifdef __GNU_LIBRARY__
++      wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
++      if (patternend != pattern + size)
++	wcsize = (size_t) -1;
++# else
++      {
++	char *patterncopy = xmalloc (size + 1);
++
++	memcpy (patterncopy, pattern, size);
++	patterncopy[size] = '\0';
++	patternend = patterncopy;
++	wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
++	if (patternend != patterncopy + size)
++	  wcsize = (size_t) -1;
++	free (patterncopy);
++      }
++# endif
++      if (wcsize + 2 <= 2)
++	{
++fimb_fail:
++	  free (wcpattern);
++	  free (starts);
++	  if (fimb_kwset)
++	    kwsfree (fimb_kwset);
++	  free (Fimb.patterns);
++	  Fimb.patterns = NULL;
++	}
++      else
++	{
++	  if (!(fimb_kwset = kwsalloc (NULL)))
++	    error (2, 0, _("memory exhausted"));
++
++	  starts = xmalloc (mb_cur_max * 3);
++	  wcbeg = wcpattern;
++	  do
++	    {
++	      int i;
++	      size_t wclen;
++
++	      if (Fimb.count >= allocated)
++		{
++		  if (allocated == 0)
++		    allocated = 128;
++		  else
++		    allocated *= 2;
++		  Fimb.patterns = xrealloc (Fimb.patterns,
++					    sizeof (wchar_t *) * allocated);
++		}
++	      Fimb.patterns[Fimb.count++] = wcbeg;
++	      for (wclim = wcbeg;
++		   wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
++		*wclim = towlower (*wclim);
++	      *wclim = L'\0';
++	      wclen = wclim - wcbeg;
++	      if (wclen > Fimb.maxlen)
++		Fimb.maxlen = wclen;
++	      if (wclen > 3)
++		wclen = 3;
++	      if (wclen == 0)
++		{
++		  if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
++		    error (2, 0, err);
++		}
++	      else
++		for (i = 0; i < (1 << wclen); i++)
++		  {
++		    char *p = starts;
++		    int j, k;
++
++		    for (j = 0; j < wclen; ++j)
++		      {
++			wchar_t wc = wcbeg[j];
++			if (i & (1 << j))
++			  {
++			    wc = towupper (wc);
++			    if (wc == wcbeg[j])
++			      continue;
++			  }
++			k = wctomb (p, wc);
++			if (k <= 0)
++			  goto fimb_fail;
++			p += k;
++		      }
++		    if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
++		      error (2, 0, err);
++		  }
++	      if (wclim < wcpattern + wcsize)
++		++wclim;
++	      wcbeg = wclim;
++	    }
++	  while (wcbeg < wcpattern + wcsize);
++	  f_i_multibyte = 1;
++	  kwset = fimb_kwset;
++	  free (starts);
++	  Fimb.match = xmalloc (Fimb.count);
++	  if ((err = kwsprep (kwset)) != 0)
++	    error (2, 0, err);
++	  return;
++	}
++    }
++#endif /* MBS_SUPPORT */
++
++
+   kwsinit ();
+   beg = pattern;
+   do
+@@ -499,6 +782,76 @@
+     error (2, 0, err);
+ }
+ 
++#ifdef MBS_SUPPORT
++static int
++Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
++{
++  size_t len, letter, i;
++  int ret = -1;
++  mbstate_t mbs;
++  wchar_t wc;
++  int patterns_left;
++
++  assert (match_icase && f_i_multibyte == 1);
++  assert (MB_CUR_MAX > 1);
++
++  memset (&mbs, '\0', sizeof (mbs));
++  memset (Fimb.match, '\1', Fimb.count);
++  letter = len = 0;
++  patterns_left = 1;
++  while (patterns_left && len <= size)
++    {
++      size_t c;
++
++      patterns_left = 0;
++      if (len < size)
++	{
++	  c = mbrtowc (&wc, buf + len, size - len, &mbs);
++	  if (c + 2 <= 2)
++	    return ret;
++
++	  wc = towlower (wc);
++	}
++      else
++	{
++	  c = 1;
++	  wc = L'\0';
++	}
++
++      for (i = 0; i < Fimb.count; i++)
++	{
++	  if (Fimb.match[i])
++	    {
++	      if (Fimb.patterns[i][letter] == L'\0')
++		{
++		  /* Found a match. */
++		  *plen = len;
++		  if (!exact && !match_words)
++		    return 0;
++		  else
++		    {
++		      /* For -w or exact look for longest match.  */
++		      ret = 0;
++		      Fimb.match[i] = '\0';
++		      continue;
++		    }
++		}
++
++	      if (Fimb.patterns[i][letter] == wc)
++		patterns_left = 1;
++	      else
++		Fimb.match[i] = '\0';
++	    }
++	}
++
++      len += c;
++      letter++;
++    }
++
++  return ret;
++}
++#endif /* MBS_SUPPORT */
++
+ static size_t
+ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
+ {
+@@ -506,88 +859,268 @@
+   register size_t len;
+   char eol = eolbyte;
+   struct kwsmatch kwsmatch;
++  size_t ret_val;
+ #ifdef MBS_SUPPORT
+-  char *mb_properties;
+-  if (MB_CUR_MAX > 1)
+-    mb_properties = check_multibyte_string (buf, size);
++  int mb_cur_max = MB_CUR_MAX;
++  mbstate_t mbs;
++  memset (&mbs, '\0', sizeof (mbstate_t));
++  const char *last_char = NULL;
+ #endif /* MBS_SUPPORT */
+ 
+   for (beg = buf; beg <= buf + size; ++beg)
+     {
+-      size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
++      size_t offset;
++      offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
++
+       if (offset == (size_t) -1)
+-	{
++	goto failure;
+ #ifdef MBS_SUPPORT
+-	  if (MB_CUR_MAX > 1)
+-	    free(mb_properties);
+-#endif /* MBS_SUPPORT */
+-	  return offset;
++      if (mb_cur_max > 1 && !using_utf8)
++	{
++	  size_t bytes_left = offset;
++	  while (bytes_left)
++	    {
++	      size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++	      last_char = beg;
++	      if (mlen == (size_t) -1 || mlen == 0)
++		{
++		  /* Incomplete character: treat as single-byte. */
++		  memset (&mbs, '\0', sizeof (mbstate_t));
++		  beg++;
++		  bytes_left--;
++		  continue;
++		}
++
++	      if (mlen == (size_t) -2)
++		/* Offset points inside multibyte character: no good. */
++		break;
++
++	      beg += mlen;
++	      bytes_left -= mlen;
++	    }
++
++	  if (bytes_left)
++	    continue;
+ 	}
+-#ifdef MBS_SUPPORT
+-      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+-	continue; /* It is a part of multibyte character.  */
++      else
+ #endif /* MBS_SUPPORT */
+       beg += offset;
+-      len = kwsmatch.size[0];
+-      if (exact)
+-	{
+-	  *match_size = len;
+ #ifdef MBS_SUPPORT
+-	  if (MB_CUR_MAX > 1)
+-	    free (mb_properties);
++      /* For f_i_multibyte, the string at beg now matches first 3 chars of
++	 one of the search strings (less if there are shorter search strings).
++	 See if this is a real match.  */
++      if (f_i_multibyte
++	  && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
++	goto next_char;
+ #endif /* MBS_SUPPORT */
+-	  return beg - buf;
+-	}
++      len = kwsmatch.size[0];
++      if (exact && !match_words)
++	goto success_in_beg_and_len;
+       if (match_lines)
+ 	{
+ 	  if (beg > buf && beg[-1] != eol)
+-	    continue;
++	    goto next_char;
+ 	  if (beg + len < buf + size && beg[len] != eol)
+-	    continue;
++	    goto next_char;
+ 	  goto success;
+ 	}
+       else if (match_words)
+-	for (try = beg; len; )
+-	  {
+-	    if (try > buf && WCHAR((unsigned char) try[-1]))
+-	      break;
+-	    if (try + len < buf + size && WCHAR((unsigned char) try[len]))
+-	      {
+-		offset = kwsexec (kwset, beg, --len, &kwsmatch);
+-		if (offset == (size_t) -1)
+-		  {
++	{
++	  while (len)
++	    {
++	      int word_match = 0;
++	      if (beg > buf)
++		{
+ #ifdef MBS_SUPPORT
+-		    if (MB_CUR_MAX > 1)
+-		      free (mb_properties);
++		  if (mb_cur_max > 1)
++		    {
++		      const char *s;
++		      int mr;
++		      wchar_t pwc;
++
++		      if (using_utf8)
++			{
++			  s = beg - 1;
++			  while (s > buf
++				 && (unsigned char) *s >= 0x80
++				 && (unsigned char) *s <= 0xbf)
++			    --s;
++			}
++		      else
++			s = last_char;
++		      mr = mbtowc (&pwc, s, beg - s);
++		      if (mr <= 0)
++			memset (&mbs, '\0', sizeof (mbstate_t));
++		      else if ((iswalnum (pwc) || pwc == L'_')
++			       && mr == (int) (beg - s))
++			goto next_char;
++		    }
++		  else
+ #endif /* MBS_SUPPORT */
+-		    return offset;
+-		  }
+-		try = beg + offset;
+-		len = kwsmatch.size[0];
+-	      }
+-	    else
+-	      goto success;
+-	  }
++		  if (WCHAR ((unsigned char) beg[-1]))
++		    goto next_char;
++		}
++#ifdef MBS_SUPPORT
++	      if (mb_cur_max > 1)
++		{
++		  wchar_t nwc;
++		  int mr;
++
++		  mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
++		  if (mr <= 0)
++		    {
++		      memset (&mbs, '\0', sizeof (mbstate_t));
++		      word_match = 1;
++		    }
++		  else if (!iswalnum (nwc) && nwc != L'_')
++		    word_match = 1;
++		}
++	      else
++#endif /* MBS_SUPPORT */
++		if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
++		  word_match = 1;
++	      if (word_match)
++		{
++		  if (!exact)
++		    /* Returns the whole line now we know there's a word match. */
++		    goto success;
++		  else
++		    /* Returns just this word match. */
++		    goto success_in_beg_and_len;
++		}
++	      if (len > 0)
++		{
++		  /* Try a shorter length anchored at the same place. */
++		  --len;
++		  offset = kwsexec (kwset, beg, len, &kwsmatch);
++
++		  if (offset == -1)
++		    goto next_char; /* Try a different anchor. */
++#ifdef MBS_SUPPORT
++		  if (mb_cur_max > 1 && !using_utf8)
++		    {
++		      size_t bytes_left = offset;
++		      while (bytes_left)
++			{
++			  size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++			  last_char = beg;
++			  if (mlen == (size_t) -1 || mlen == 0)
++			    {
++			      /* Incomplete character: treat as single-byte. */
++			      memset (&mbs, '\0', sizeof (mbstate_t));
++			      beg++;
++			      bytes_left--;
++			      continue;
++			    }
++
++			  if (mlen == (size_t) -2)
++			    {
++			      /* Offset points inside multibyte character:
++			       * no good. */
++			      break;
++			    }
++
++			  beg += mlen;
++			  bytes_left -= mlen;
++			}
++
++		      if (bytes_left)
++			{
++			  memset (&mbs, '\0', sizeof (mbstate_t));
++			  goto next_char; /* Try a different anchor. */
++			}
++		    }
++		  else
++#endif /* MBS_SUPPORT */
++		  beg += offset;
++#ifdef MBS_SUPPORT
++		  /* The string at beg now matches first 3 chars of one of
++		     the search strings (less if there are shorter search
++		     strings).  See if this is a real match.  */
++		  if (f_i_multibyte
++		      && Fimbexec (beg, len - offset, &kwsmatch.size[0],
++				   exact))
++		    goto next_char;
++#endif /* MBS_SUPPORT */
++		  len = kwsmatch.size[0];
++		}
++	    }
++	}
+       else
+ 	goto success;
+-    }
+-
++next_char:;
+ #ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1)
+-    free (mb_properties);
++      /* Advance to next character.  For MB_CUR_MAX == 1 case this is handled
++	 by ++beg above.  */
++      if (mb_cur_max > 1)
++	{
++	  if (using_utf8)
++	    {
++	      unsigned char c = *beg;
++	      if (c >= 0xc2)
++		{
++		  if (c < 0xe0)
++		    ++beg;
++		  else if (c < 0xf0)
++		    beg += 2;
++		  else if (c < 0xf8)
++		    beg += 3;
++		  else if (c < 0xfc)
++		    beg += 4;
++		  else if (c < 0xfe)
++		    beg += 5;
++		}
++	    }
++	  else
++	    {
++	      size_t l = mbrlen (beg, buf + size - beg, &mbs);
++
++	      last_char = beg;
++	      if (l + 2 >= 2)
++		beg += l - 1;
++	      else
++		memset (&mbs, '\0', sizeof (mbstate_t));
++	    }
++	}
+ #endif /* MBS_SUPPORT */
++    }
++
++ failure:
+   return -1;
+ 
+  success:
++#ifdef MBS_SUPPORT
++  if (mb_cur_max > 1 && !using_utf8)
++    {
++      end = beg + len;
++      while (end < buf + size)
++	{
++	  size_t mlen = mbrlen (end, buf + size - end, &mbs);
++	  if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
++	    {
++	      memset (&mbs, '\0', sizeof (mbstate_t));
++	      mlen = 1;
++	    }
++	  if (mlen == 1 && *end == eol)
++	    break;
++
++	  end += mlen;
++	}
++    }
++  else
++#endif /* MBS_SUPPORT */
+   end = memchr (beg + len, eol, (buf + size) - (beg + len));
++
+   end++;
+   while (buf < beg && beg[-1] != eol)
+     --beg;
+-  *match_size = end - beg;
+-#ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1)
+-    free (mb_properties);
+-#endif /* MBS_SUPPORT */
++  len = end - beg;
++  /* FALLTHROUGH */
++
++ success_in_beg_and_len:
++  *match_size = len;
+   return beg - buf;
+ }
+ 
+diff -urN grep-2.5.1a.orig/src/search.c.orig grep-2.5.1a/src/search.c.orig
+--- grep-2.5.1a.orig/src/search.c.orig	1970-01-01 05:00:00.000000000 +0500
++++ grep-2.5.1a/src/search.c.orig	2005-10-23 09:48:39.000000000 +0600
+@@ -0,0 +1,714 @@
++/* search.c - searching subroutines using dfa, kwset and regex for grep.
++   Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
++
++   This program is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 2, or (at your option)
++   any later version.
++
++   This program is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++   02111-1307, USA.  */
++
++/* Written August 1992 by Mike Haertel. */
++
++#ifdef HAVE_CONFIG_H
++# include <config.h>
++#endif
++#include <sys/types.h>
++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
++/* We can handle multibyte string.  */
++# define MBS_SUPPORT
++# include <wchar.h>
++# include <wctype.h>
++#endif
++
++#include "system.h"
++#include "grep.h"
++#include "regex.h"
++#include "dfa.h"
++#include "kwset.h"
++#include "error.h"
++#include "xalloc.h"
++#ifdef HAVE_LIBPCRE
++# include <pcre.h>
++#endif
++
++#define NCHAR (UCHAR_MAX + 1)
++
++/* For -w, we also consider _ to be word constituent.  */
++#define WCHAR(C) (ISALNUM(C) || (C) == '_')
++
++/* DFA compiled regexp. */
++static struct dfa dfa;
++
++/* The Regex compiled patterns.  */
++static struct patterns
++{
++  /* Regex compiled regexp. */
++  struct re_pattern_buffer regexbuf;
++  struct re_registers regs; /* This is here on account of a BRAIN-DEAD
++			       Q@#%!# library interface in regex.c.  */
++} patterns0;
++
++struct patterns *patterns;
++size_t pcount;
++
++/* KWset compiled pattern.  For Ecompile and Gcompile, we compile
++   a list of strings, at least one of which is known to occur in
++   any string matching the regexp. */
++static kwset_t kwset;
++
++/* Number of compiled fixed strings known to exactly match the regexp.
++   If kwsexec returns < kwset_exact_matches, then we don't need to
++   call the regexp matcher at all. */
++static int kwset_exact_matches;
++
++#if defined(MBS_SUPPORT)
++static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
++#endif
++static void kwsinit PARAMS ((void));
++static void kwsmusts PARAMS ((void));
++static void Gcompile PARAMS ((char const *, size_t));
++static void Ecompile PARAMS ((char const *, size_t));
++static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
++static void Fcompile PARAMS ((char const *, size_t));
++static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
++static void Pcompile PARAMS ((char const *, size_t ));
++static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
++
++void
++dfaerror (char const *mesg)
++{
++  error (2, 0, mesg);
++}
++
++static void
++kwsinit (void)
++{
++  static char trans[NCHAR];
++  int i;
++
++  if (match_icase)
++    for (i = 0; i < NCHAR; ++i)
++      trans[i] = TOLOWER (i);
++
++  if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
++    error (2, 0, _("memory exhausted"));
++}
++
++/* If the DFA turns out to have some set of fixed strings one of
++   which must occur in the match, then we build a kwset matcher
++   to find those strings, and thus quickly filter out impossible
++   matches. */
++static void
++kwsmusts (void)
++{
++  struct dfamust const *dm;
++  char const *err;
++
++  if (dfa.musts)
++    {
++      kwsinit ();
++      /* First, we compile in the substrings known to be exact
++	 matches.  The kwset matcher will return the index
++	 of the matching string that it chooses. */
++      for (dm = dfa.musts; dm; dm = dm->next)
++	{
++	  if (!dm->exact)
++	    continue;
++	  ++kwset_exact_matches;
++	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
++	    error (2, 0, err);
++	}
++      /* Now, we compile the substrings that will require
++	 the use of the regexp matcher.  */
++      for (dm = dfa.musts; dm; dm = dm->next)
++	{
++	  if (dm->exact)
++	    continue;
++	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
++	    error (2, 0, err);
++	}
++      if ((err = kwsprep (kwset)) != 0)
++	error (2, 0, err);
++    }
++}
++
++#ifdef MBS_SUPPORT
++/* This function allocate the array which correspond to "buf".
++   Then this check multibyte string and mark on the positions which
++   are not singlebyte character nor the first byte of a multibyte
++   character.  Caller must free the array.  */
++static char*
++check_multibyte_string(char const *buf, size_t size)
++{
++  char *mb_properties = malloc(size);
++  mbstate_t cur_state;
++  int i;
++  memset(&cur_state, 0, sizeof(mbstate_t));
++  memset(mb_properties, 0, sizeof(char)*size);
++  for (i = 0; i < size ;)
++    {
++      size_t mbclen;
++      mbclen = mbrlen(buf + i, size - i, &cur_state);
++
++      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
++	{
++	  /* An invalid sequence, or a truncated multibyte character.
++	     We treat it as a singlebyte character.  */
++	  mbclen = 1;
++	}
++      mb_properties[i] = mbclen;
++      i += mbclen;
++    }
++
++  return mb_properties;
++}
++#endif
++
++static void
++Gcompile (char const *pattern, size_t size)
++{
++  const char *err;
++  char const *sep;
++  size_t total = size;
++  char const *motif = pattern;
++
++  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
++  dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
++
++  /* For GNU regex compiler we have to pass the patterns separately to detect
++     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
++     GNU regex should have raise a syntax error.  The same for backref, where
++     the backref should have been local to each pattern.  */
++  do
++    {
++      size_t len;
++      sep = memchr (motif, '\n', total);
++      if (sep)
++	{
++	  len = sep - motif;
++	  sep++;
++	  total -= (len + 1);
++	}
++      else
++	{
++	  len = total;
++	  total = 0;
++	}
++
++      patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
++      if (patterns == NULL)
++	error (2, errno, _("memory exhausted"));
++
++      patterns[pcount] = patterns0;
++
++      if ((err = re_compile_pattern (motif, len,
++				    &(patterns[pcount].regexbuf))) != 0)
++	error (2, 0, err);
++      pcount++;
++
++      motif = sep;
++    } while (sep && total != 0);
++
++  /* In the match_words and match_lines cases, we use a different pattern
++     for the DFA matcher that will quickly throw out cases that won't work.
++     Then if DFA succeeds we do some hairy stuff using the regex matcher
++     to decide whether the match should really count. */
++  if (match_words || match_lines)
++    {
++      /* In the whole-word case, we use the pattern:
++	 \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
++	 In the whole-line case, we use the pattern:
++	 ^\(userpattern\)$.  */
++
++      static char const line_beg[] = "^\\(";
++      static char const line_end[] = "\\)$";
++      static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
++      static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
++      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++      size_t i;
++      strcpy (n, match_lines ? line_beg : word_beg);
++      i = strlen (n);
++      memcpy (n + i, pattern, size);
++      i += size;
++      strcpy (n + i, match_lines ? line_end : word_end);
++      i += strlen (n + i);
++      pattern = n;
++      size = i;
++    }
++
++  dfacomp (pattern, size, &dfa, 1);
++  kwsmusts ();
++}
++
++static void
++Ecompile (char const *pattern, size_t size)
++{
++  const char *err;
++  const char *sep;
++  size_t total = size;
++  char const *motif = pattern;
++
++  if (strcmp (matcher, "awk") == 0)
++    {
++      re_set_syntax (RE_SYNTAX_AWK);
++      dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
++    }
++  else
++    {
++      re_set_syntax (RE_SYNTAX_POSIX_EGREP);
++      dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
++    }
++
++  /* For GNU regex compiler we have to pass the patterns separately to detect
++     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
++     GNU regex should have raise a syntax error.  The same for backref, where
++     the backref should have been local to each pattern.  */
++  do
++    {
++      size_t len;
++      sep = memchr (motif, '\n', total);
++      if (sep)
++	{
++	  len = sep - motif;
++	  sep++;
++	  total -= (len + 1);
++	}
++      else
++	{
++	  len = total;
++	  total = 0;
++	}
++
++      patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
++      if (patterns == NULL)
++	error (2, errno, _("memory exhausted"));
++      patterns[pcount] = patterns0;
++
++      if ((err = re_compile_pattern (motif, len,
++				    &(patterns[pcount].regexbuf))) != 0)
++	error (2, 0, err);
++      pcount++;
++
++      motif = sep;
++    } while (sep && total != 0);
++
++  /* In the match_words and match_lines cases, we use a different pattern
++     for the DFA matcher that will quickly throw out cases that won't work.
++     Then if DFA succeeds we do some hairy stuff using the regex matcher
++     to decide whether the match should really count. */
++  if (match_words || match_lines)
++    {
++      /* In the whole-word case, we use the pattern:
++	 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
++	 In the whole-line case, we use the pattern:
++	 ^(userpattern)$.  */
++
++      static char const line_beg[] = "^(";
++      static char const line_end[] = ")$";
++      static char const word_beg[] = "(^|[^[:alnum:]_])(";
++      static char const word_end[] = ")([^[:alnum:]_]|$)";
++      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++      size_t i;
++      strcpy (n, match_lines ? line_beg : word_beg);
++      i = strlen(n);
++      memcpy (n + i, pattern, size);
++      i += size;
++      strcpy (n + i, match_lines ? line_end : word_end);
++      i += strlen (n + i);
++      pattern = n;
++      size = i;
++    }
++
++  dfacomp (pattern, size, &dfa, 1);
++  kwsmusts ();
++}
++
++static size_t
++EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
++{
++  register char const *buflim, *beg, *end;
++  char eol = eolbyte;
++  int backref, start, len;
++  struct kwsmatch kwsm;
++  size_t i;
++#ifdef MBS_SUPPORT
++  char *mb_properties = NULL;
++#endif /* MBS_SUPPORT */
++
++#ifdef MBS_SUPPORT
++  if (MB_CUR_MAX > 1 && kwset)
++    mb_properties = check_multibyte_string(buf, size);
++#endif /* MBS_SUPPORT */
++
++  buflim = buf + size;
++
++  for (beg = end = buf; end < buflim; beg = end)
++    {
++      if (!exact)
++	{
++	  if (kwset)
++	    {
++	      /* Find a possible match using the KWset matcher. */
++	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
++	      if (offset == (size_t) -1)
++	        goto failure;
++	      beg += offset;
++	      /* Narrow down to the line containing the candidate, and
++		 run it through DFA. */
++	      end = memchr(beg, eol, buflim - beg);
++	      end++;
++#ifdef MBS_SUPPORT
++	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
++		continue;
++#endif
++	      while (beg > buf && beg[-1] != eol)
++		--beg;
++	      if (kwsm.index < kwset_exact_matches)
++		goto success_in_beg_and_end;
++	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
++		continue;
++	    }
++	  else
++	    {
++	      /* No good fixed strings; start with DFA. */
++	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
++	      if (offset == (size_t) -1)
++		break;
++	      /* Narrow down to the line we've found. */
++	      beg += offset;
++	      end = memchr (beg, eol, buflim - beg);
++	      end++;
++	      while (beg > buf && beg[-1] != eol)
++		--beg;
++	    }
++	  /* Successful, no backreferences encountered! */
++	  if (!backref)
++	    goto success_in_beg_and_end;
++	}
++      else
++	end = beg + size;
++
++      /* If we've made it to this point, this means DFA has seen
++	 a probable match, and we need to run it through Regex. */
++      for (i = 0; i < pcount; i++)
++	{
++	  patterns[i].regexbuf.not_eol = 0;
++	  if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
++				       end - beg - 1, 0,
++				       end - beg - 1, &(patterns[i].regs))))
++	    {
++	      len = patterns[i].regs.end[0] - start;
++	      if (exact && !match_words)
++	        goto success_in_start_and_len;
++	      if ((!match_lines && !match_words)
++		  || (match_lines && len == end - beg - 1))
++		goto success_in_beg_and_end;
++	      /* If -w, check if the match aligns with word boundaries.
++		 We do this iteratively because:
++		 (a) the line may contain more than one occurence of the
++		 pattern, and
++		 (b) Several alternatives in the pattern might be valid at a
++		 given point, and we may need to consider a shorter one to
++		 find a word boundary.  */
++	      if (match_words)
++		while (start >= 0)
++		  {
++		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
++			&& (len == end - beg - 1
++			    || !WCHAR ((unsigned char) beg[start + len])))
++		      goto success_in_beg_and_end;
++		    if (len > 0)
++		      {
++			/* Try a shorter length anchored at the same place. */
++			--len;
++			patterns[i].regexbuf.not_eol = 1;
++			len = re_match (&(patterns[i].regexbuf), beg,
++					start + len, start,
++					&(patterns[i].regs));
++		      }
++		    if (len <= 0)
++		      {
++			/* Try looking further on. */
++			if (start == end - beg - 1)
++			  break;
++			++start;
++			patterns[i].regexbuf.not_eol = 0;
++			start = re_search (&(patterns[i].regexbuf), beg,
++					   end - beg - 1,
++					   start, end - beg - 1 - start,
++					   &(patterns[i].regs));
++			len = patterns[i].regs.end[0] - start;
++		      }
++		  }
++	    }
++	} /* for Regex patterns.  */
++    } /* for (beg = end ..) */
++
++ failure:
++#ifdef MBS_SUPPORT
++  if (MB_CUR_MAX > 1 && mb_properties)
++    free (mb_properties);
++#endif /* MBS_SUPPORT */
++  return (size_t) -1;
++
++ success_in_beg_and_end:
++  len = end - beg;
++  start = beg - buf;
++  /* FALLTHROUGH */
++
++ success_in_start_and_len:
++#ifdef MBS_SUPPORT
++  if (MB_CUR_MAX > 1 && mb_properties)
++    free (mb_properties);
++#endif /* MBS_SUPPORT */
++  *match_size = len;
++  return start;
++}
++
++static void
++Fcompile (char const *pattern, size_t size)
++{
++  char const *beg, *lim, *err;
++
++  kwsinit ();
++  beg = pattern;
++  do
++    {
++      for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
++	;
++      if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
++	error (2, 0, err);
++      if (lim < pattern + size)
++	++lim;
++      beg = lim;
++    }
++  while (beg < pattern + size);
++
++  if ((err = kwsprep (kwset)) != 0)
++    error (2, 0, err);
++}
++
++static size_t
++Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
++{
++  register char const *beg, *try, *end;
++  register size_t len;
++  char eol = eolbyte;
++  struct kwsmatch kwsmatch;
++#ifdef MBS_SUPPORT
++  char *mb_properties;
++  if (MB_CUR_MAX > 1)
++    mb_properties = check_multibyte_string (buf, size);
++#endif /* MBS_SUPPORT */
++
++  for (beg = buf; beg <= buf + size; ++beg)
++    {
++      size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
++      if (offset == (size_t) -1)
++	goto failure;
++#ifdef MBS_SUPPORT
++      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
++	continue; /* It is a part of multibyte character.  */
++#endif /* MBS_SUPPORT */
++      beg += offset;
++      len = kwsmatch.size[0];
++      if (exact && !match_words)
++	goto success_in_beg_and_len;
++      if (match_lines)
++	{
++	  if (beg > buf && beg[-1] != eol)
++	    continue;
++	  if (beg + len < buf + size && beg[len] != eol)
++	    continue;
++	  goto success;
++	}
++      else if (match_words)
++	for (try = beg; len; )
++	  {
++	    if (try > buf && WCHAR((unsigned char) try[-1]))
++	      break;
++	    if (try + len < buf + size && WCHAR((unsigned char) try[len]))
++	      {
++		offset = kwsexec (kwset, beg, --len, &kwsmatch);
++		if (offset == (size_t) -1)
++		  {
++#ifdef MBS_SUPPORT
++		    if (MB_CUR_MAX > 1)
++		      free (mb_properties);
++#endif /* MBS_SUPPORT */
++		    return offset;
++		  }
++		try = beg + offset;
++		len = kwsmatch.size[0];
++	      }
++	    else
++	      goto success;
++	  }
++      else
++	goto success;
++    }
++
++ failure:
++#ifdef MBS_SUPPORT
++  if (MB_CUR_MAX > 1)
++    free (mb_properties);
++#endif /* MBS_SUPPORT */
++  return -1;
++
++ success:
++  end = memchr (beg + len, eol, (buf + size) - (beg + len));
++  end++;
++  while (buf < beg && beg[-1] != eol)
++    --beg;
++  len = end - beg;
++  /* FALLTHROUGH */
++
++ success_in_beg_and_len:
++  *match_size = len;
++#ifdef MBS_SUPPORT
++  if (MB_CUR_MAX > 1)
++    free (mb_properties);
++#endif /* MBS_SUPPORT */
++  return beg - buf;
++}
++
++#if HAVE_LIBPCRE
++/* Compiled internal form of a Perl regular expression.  */
++static pcre *cre;
++
++/* Additional information about the pattern.  */
++static pcre_extra *extra;
++#endif
++
++static void
++Pcompile (char const *pattern, size_t size)
++{
++#if !HAVE_LIBPCRE
++  error (2, 0, _("The -P option is not supported"));
++#else
++  int e;
++  char const *ep;
++  char *re = xmalloc (4 * size + 7);
++  int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
++  char const *patlim = pattern + size;
++  char *n = re;
++  char const *p;
++  char const *pnul;
++
++  /* FIXME: Remove this restriction.  */
++  if (eolbyte != '\n')
++    error (2, 0, _("The -P and -z options cannot be combined"));
++
++  *n = '\0';
++  if (match_lines)
++    strcpy (n, "^(");
++  if (match_words)
++    strcpy (n, "\\b(");
++  n += strlen (n);
++
++  /* The PCRE interface doesn't allow NUL bytes in the pattern, so
++     replace each NUL byte in the pattern with the four characters
++     "\000", removing a preceding backslash if there are an odd
++     number of backslashes before the NUL.
++
++     FIXME: This method does not work with some multibyte character
++     encodings, notably Shift-JIS, where a multibyte character can end
++     in a backslash byte.  */
++  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
++    {
++      memcpy (n, p, pnul - p);
++      n += pnul - p;
++      for (p = pnul; pattern < p && p[-1] == '\\'; p--)
++	continue;
++      n -= (pnul - p) & 1;
++      strcpy (n, "\\000");
++      n += 4;
++    }
++
++  memcpy (n, p, patlim - p);
++  n += patlim - p;
++  *n = '\0';
++  if (match_words)
++    strcpy (n, ")\\b");
++  if (match_lines)
++    strcpy (n, ")$");
++
++  cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
++  if (!cre)
++    error (2, 0, ep);
++
++  extra = pcre_study (cre, 0, &ep);
++  if (ep)
++    error (2, 0, ep);
++
++  free (re);
++#endif
++}
++
++static size_t
++Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
++{
++#if !HAVE_LIBPCRE
++  abort ();
++  return -1;
++#else
++  /* This array must have at least two elements; everything after that
++     is just for performance improvement in pcre_exec.  */
++  int sub[300];
++
++  int e = pcre_exec (cre, extra, buf, size, 0, 0,
++		     sub, sizeof sub / sizeof *sub);
++
++  if (e <= 0)
++    {
++      switch (e)
++	{
++	case PCRE_ERROR_NOMATCH:
++	  return -1;
++
++	case PCRE_ERROR_NOMEMORY:
++	  error (2, 0, _("Memory exhausted"));
++
++	default:
++	  abort ();
++	}
++    }
++  else
++    {
++      /* Narrow down to the line we've found.  */
++      char const *beg = buf + sub[0];
++      char const *end = buf + sub[1];
++      char const *buflim = buf + size;
++      char eol = eolbyte;
++      if (!exact)
++	{
++	  end = memchr (end, eol, buflim - end);
++	  end++;
++	  while (buf < beg && beg[-1] != eol)
++	    --beg;
++	}
++
++      *match_size = end - beg;
++      return beg - buf;
++    }
++#endif
++}
++
++struct matcher const matchers[] = {
++  { "default", Gcompile, EGexecute },
++  { "grep", Gcompile, EGexecute },
++  { "egrep", Ecompile, EGexecute },
++  { "awk", Ecompile, EGexecute },
++  { "fgrep", Fcompile, Fexecute },
++  { "perl", Pcompile, Pexecute },
++  { "", 0, 0 },
++};
+diff -urN grep-2.5.1a.orig/tests/fmbtest.sh grep-2.5.1a/tests/fmbtest.sh
+--- grep-2.5.1a.orig/tests/fmbtest.sh	1970-01-01 05:00:00.000000000 +0500
++++ grep-2.5.1a/tests/fmbtest.sh	2005-10-23 09:51:12.000000000 +0600
+@@ -0,0 +1,111 @@
++#!/bin/sh
++
++: ${srcdir=.}
++
++# If cs_CZ.UTF-8 locale doesn't work, skip this test silently
++LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
++  || exit 77
++
++failures=0
++
++cat > csinput <<EOF
++01 Žluťoučká číše
++ČíŠE 02
++03 Z číší Čiší cosi
++04 Čí
++Še 05
++06 ČČČČČČČíšČÍŠčíš
++07 ČČČ ČČČČíšČÍŠčíšEEEE
++čAs 08
++09Čapka
++10ČaSy se měnÍ
++ČÍšE11
++Čas12
++𝇕ČÍšE𝇓13
++ŽČÍšE𝇓14
++𝇕ČÍšEŽ15
++ŽČÍšEŽ16
++ČÍšE𝇓17
++ČÍšEŽ18
++19𝇕ČÍše
++20ŽČÍše
++EOF
++cat > cspatfile <<EOF
++ČÍšE
++Čas
++EOF
++
++for mode in F G E; do
++
++test1="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode} -f cspatfile csinput \
++	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test1" != "11 12 13 14 15 16 17 18"; then
++  echo "Test #1 ${mode} failed: $test1"
++  failures=1
++fi
++
++test2="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -f cspatfile csinput \
++	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test2" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
++  echo "Test #2 ${mode} failed: $test2"
++  failures=1
++fi
++
++test3="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'ČÍšE' -e 'Čas' csinput \
++	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test3" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
++  echo "Test #3 ${mode} failed: $test3"
++  failures=1
++fi
++
++test4="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}iw -f cspatfile csinput \
++	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test4" != "01 02 08 13 17 19"; then
++  echo "Test #4 ${mode} failed: $test4"
++  failures=1
++fi
++
++done
++
++# Test that -F --color=always prefers longer matches.
++test5="`echo 'Cosi tu ČišÍ...' \
++	| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -Fi -e 'čiš' -e 'čiší'`"
++if echo "$test5" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
++  :
++else
++  echo "Test #5 F failed: $test5"
++  failures=1
++fi
++
++for mode in G E; do
++
++# Test that -{G,E} --color=always prefers earlier pattern matches.
++test6="`echo 'Cosi tu ČišÍ...' \
++	| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiš' -e 'čiší'`"
++if echo "$test6" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČiš.*\[.*m\(.\[K\)\?Í\.\.\.'; then
++  :
++else
++  echo "Test #6 ${mode} failed: $test6"
++  failures=1
++fi
++
++# Test that -{G,E} --color=always prefers earlier pattern matches.
++test7="`echo 'Cosi tu ČišÍ...' \
++	| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiší' -e 'čiš'`"
++if echo "$test7" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
++  :
++else
++  echo "Test #7 ${mode} failed: $test7"
++  failures=1
++fi
++
++test8="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'Č.šE' -e 'Č[a-f]s' csinput \
++	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test8" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
++  echo "Test #8 ${mode} failed: $test8"
++  failures=1
++fi
++
++done
++
++exit $failures
+diff -urN grep-2.5.1a.orig/tests/Makefile.am grep-2.5.1a/tests/Makefile.am
+--- grep-2.5.1a.orig/tests/Makefile.am	2001-03-07 09:11:27.000000000 +0500
++++ grep-2.5.1a/tests/Makefile.am	2005-10-23 09:51:12.000000000 +0600
+@@ -3,7 +3,8 @@
+ AWK=@AWK@
+ 
+ TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
+-        status.sh empty.sh options.sh backref.sh file.sh
++        status.sh empty.sh options.sh backref.sh file.sh \
++        fmbtest.sh
+ EXTRA_DIST = $(TESTS) \
+              khadafy.lines khadafy.regexp \
+              spencer1.awk spencer1.tests \
+diff -urN grep-2.5.1a.orig/tests/Makefile.in grep-2.5.1a/tests/Makefile.in
+--- grep-2.5.1a.orig/tests/Makefile.in	2002-03-26 21:09:36.000000000 +0500
++++ grep-2.5.1a/tests/Makefile.in	2005-10-23 09:51:13.000000000 +0600
+@@ -97,7 +97,8 @@
+ AWK = @AWK@
+ 
+ TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
+-        status.sh empty.sh options.sh backref.sh file.sh
++        status.sh empty.sh options.sh backref.sh file.sh \
++	fmbtest.sh
+ 
+ EXTRA_DIST = $(TESTS) \
+              khadafy.lines khadafy.regexp \

Added: trunk/linux/linux-2.6.12.5-utf8_input-2.patch
===================================================================
--- trunk/linux/linux-2.6.12.5-utf8_input-2.patch	                        (rev 0)
+++ trunk/linux/linux-2.6.12.5-utf8_input-2.patch	2006-01-25 21:40:50 UTC (rev 1372)
@@ -0,0 +1,379 @@
+Submitted by: Alexander E. Patrakov
+Date: 2005-08-23
+Initial Package Version: 2.6.12.5
+Upstream Status: Rejected: they say it modifies the meaning of an existing ioctl
+Origin: http://chris.heathens.co.nz/linux/downloads/patches-2.6.4-cdh1.tar.gz
+	Porting to linux-2.6.12.5 by Alexander E. Patrakov
+Description: This patch fixes dead keys and copy/paste of non-ASCII characters
+in UTF-8 mode on Linux console.
+See more details about the original patch at:
+http://chris.heathens.co.nz/linux/utf8.html
+
+diff -ur linux-2.6.12.5.orig/drivers/char/consolemap.c linux-2.6.12.5/drivers/char/consolemap.c
+--- linux-2.6.12.5.orig/drivers/char/consolemap.c	2005-08-15 06:20:18.000000000 +0600
++++ linux-2.6.12.5/drivers/char/consolemap.c	2005-09-23 21:26:42.000000000 +0600
+@@ -178,6 +178,7 @@
+ 	unsigned long	refcount;
+ 	unsigned long	sum;
+ 	unsigned char	*inverse_translations[4];
++	u16		*inverse_trans_unicode;
+ 	int		readonly;
+ };
+ 
+@@ -208,6 +209,41 @@
+ 	}
+ }
+ 
++static void set_inverse_trans_unicode(struct vc_data *conp, 
++				      struct uni_pagedir *p)
++{
++	int i, j, k, glyph;
++	u16 **p1, *p2;
++	u16 *q;
++	
++	if (!p) return;
++	q = p->inverse_trans_unicode;
++	if (!q) {
++		q = p->inverse_trans_unicode =
++			kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
++		if (!q)
++			return;
++	}
++	memset(q, 0, MAX_GLYPH * sizeof(u16));
++
++	for (i = 0; i < 32; i++) {
++		p1 = p->uni_pgdir[i];
++		if (!p1)
++			continue;
++		for (j = 0; j < 32; j++) {
++			p2 = p1[j];
++			if (!p2)
++				continue;
++			for (k = 0; k < 64; k++) {
++				glyph = p2[k];
++				if (glyph >= 0 && glyph < MAX_GLYPH 
++					       && q[glyph] < 32)
++		  			q[glyph] = (i << 11) + (j << 6) + k;
++			}
++		}
++	}
++}
++
+ unsigned short *set_translate(int m, struct vc_data *vc)
+ {
+ 	inv_translate[vc->vc_num] = m;
+@@ -218,19 +254,29 @@
+  * Inverse translation is impossible for several reasons:
+  * 1. The font<->character maps are not 1-1.
+  * 2. The text may have been written while a different translation map
+- *    was active, or using Unicode.
++ *    was active.
+  * Still, it is now possible to a certain extent to cut and paste non-ASCII.
+  */
+-unsigned char inverse_translate(struct vc_data *conp, int glyph)
++u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
+ {
+ 	struct uni_pagedir *p;
++	int m;
+ 	if (glyph < 0 || glyph >= MAX_GLYPH)
+ 		return 0;
+-	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) ||
+-		 !p->inverse_translations[inv_translate[conp->vc_num]])
++	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
+ 		return glyph;
+-	else
+-		return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
++	else if (use_unicode) {
++		if (!p->inverse_trans_unicode)
++			return glyph;
++		else
++			return p->inverse_trans_unicode[glyph];
++	} else {
++		m = inv_translate[conp->vc_num];
++		if (!p->inverse_translations[m])
++			return glyph;
++		else
++			return p->inverse_translations[m][glyph];
++	}
+ }
+ 
+ static void update_user_maps(void)
+@@ -244,6 +290,7 @@
+ 		p = (struct uni_pagedir *)*vc_cons[i].d->vc_uni_pagedir_loc;
+ 		if (p && p != q) {
+ 			set_inverse_transl(vc_cons[i].d, p, USER_MAP);
++			set_inverse_trans_unicode(vc_cons[i].d, p);
+ 			q = p;
+ 		}
+ 	}
+@@ -356,6 +403,10 @@
+ 			kfree(p->inverse_translations[i]);
+ 			p->inverse_translations[i] = NULL;
+ 		}
++	if (p->inverse_trans_unicode) {
++		kfree(p->inverse_trans_unicode);
++		p->inverse_trans_unicode = NULL;
++	}
+ }
+ 
+ void con_free_unimap(struct vc_data *vc)
+@@ -514,6 +565,7 @@
+ 
+ 	for (i = 0; i <= 3; i++)
+ 		set_inverse_transl(vc, p, i); /* Update all inverse translations */
++	set_inverse_trans_unicode(vc, p);
+   
+ 	return err;
+ }
+@@ -564,6 +616,7 @@
+ 
+ 	for (i = 0; i <= 3; i++)
+ 		set_inverse_transl(vc, p, i);	/* Update all inverse translations */
++	set_inverse_trans_unicode(vc, p);
+ 	dflt = p;
+ 	return err;
+ }
+@@ -620,6 +673,19 @@
+ 		p->readonly = rdonly;
+ }
+ 
++/* may be called during an interrupt */
++u32 conv_8bit_to_uni(unsigned char c)
++{
++	/* 
++	 * Always use USER_MAP. This function is used by the keyboard,
++	 * which shouldn't be affected by G0/G1 switching, etc.
++	 * If the user map still contains default values, i.e. the 
++	 * direct-to-font mapping, then assume user is using Latin1.
++	 */
++	unsigned short uni = translations[USER_MAP][c];
++	return uni == (0xf000 | c) ? c : uni;
++}
++
+ int
+ conv_uni_to_pc(struct vc_data *conp, long ucs) 
+ {
+diff -ur linux-2.6.12.5.orig/drivers/char/keyboard.c linux-2.6.12.5/drivers/char/keyboard.c
+--- linux-2.6.12.5.orig/drivers/char/keyboard.c	2005-08-15 06:20:18.000000000 +0600
++++ linux-2.6.12.5/drivers/char/keyboard.c	2005-09-23 18:44:35.000000000 +0600
+@@ -34,6 +34,7 @@
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ 
++#include <linux/consolemap.h>
+ #include <linux/kbd_kern.h>
+ #include <linux/kbd_diacr.h>
+ #include <linux/vt_kern.h>
+@@ -330,10 +331,9 @@
+  * Many other routines do put_queue, but I think either
+  * they produce ASCII, or they produce some user-assigned
+  * string, and in both cases we might assume that it is
+- * in utf-8 already. UTF-8 is defined for words of up to 31 bits,
+- * but we need only 16 bits here
++ * in utf-8 already.
+  */
+-static void to_utf8(struct vc_data *vc, ushort c)
++static void to_utf8(struct vc_data *vc, uint c)
+ {
+ 	if (c < 0x80)
+ 		/*  0******* */
+@@ -342,14 +342,33 @@
+ 		/* 110***** 10****** */
+ 		put_queue(vc, 0xc0 | (c >> 6)); 
+ 		put_queue(vc, 0x80 | (c & 0x3f));
+-    	} else {
++    	} else if (c < 0x10000) {
++	       	if (c >= 0xD800 && c < 0xE000)
++			return;
++		if (c == 0xFFFF)
++			return;
+ 		/* 1110**** 10****** 10****** */
+ 		put_queue(vc, 0xe0 | (c >> 12));
+ 		put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
+ 		put_queue(vc, 0x80 | (c & 0x3f));
++    	} else if (c < 0x110000) {
++		/* 11110*** 10****** 10****** 10****** */
++		put_queue(vc, 0xf0 | (c >> 18));
++		put_queue(vc, 0x80 | ((c >> 12) & 0x3f));
++		put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
++		put_queue(vc, 0x80 | (c & 0x3f));
+     	}
+ }
+ 
++static void put_8bit(struct vc_data *vc, u8 c)
++{
++	if (kbd->kbdmode != VC_UNICODE || c < 32 || c == 127) 
++		/* Don't translate control chars */
++		put_queue(vc, c);
++	else
++		to_utf8(vc, conv_8bit_to_uni(c));
++}
++
+ /* 
+  * Called after returning from RAW mode or when changing consoles - recompute
+  * shift_down[] and shift_state from key_down[] maybe called when keymap is
+@@ -410,7 +429,7 @@
+ 	if (ch == ' ' || ch == d)
+ 		return d;
+ 
+-	put_queue(vc, d);
++	put_8bit(vc, d);
+ 	return ch;
+ }
+ 
+@@ -420,7 +439,7 @@
+ static void fn_enter(struct vc_data *vc, struct pt_regs *regs)
+ {
+ 	if (diacr) {
+-		put_queue(vc, diacr);
++		put_8bit(vc, diacr);
+ 		diacr = 0;
+ 	}
+ 	put_queue(vc, 13);
+@@ -629,7 +648,7 @@
+ 		diacr = value;
+ 		return;
+ 	}
+-	put_queue(vc, value);
++	put_8bit(vc, value);
+ }
+ 
+ /*
+@@ -775,7 +794,7 @@
+ 	/* kludge */
+ 	if (up_flag && shift_state != old_state && npadch != -1) {
+ 		if (kbd->kbdmode == VC_UNICODE)
+-			to_utf8(vc, npadch & 0xffff);
++			to_utf8(vc, npadch);
+ 		else
+ 			put_queue(vc, npadch & 0xff);
+ 		npadch = -1;
+diff -ur linux-2.6.12.5.orig/drivers/char/selection.c linux-2.6.12.5/drivers/char/selection.c
+--- linux-2.6.12.5.orig/drivers/char/selection.c	2005-08-15 06:20:18.000000000 +0600
++++ linux-2.6.12.5/drivers/char/selection.c	2005-09-23 19:25:21.000000000 +0600
+@@ -20,6 +20,7 @@
+ 
+ #include <asm/uaccess.h>
+ 
++#include <linux/kbd_kern.h>
+ #include <linux/vt_kern.h>
+ #include <linux/consolemap.h>
+ #include <linux/selection.h>
+@@ -34,6 +35,7 @@
+ /* Variables for selection control. */
+ /* Use a dynamic buffer, instead of static (Dec 1994) */
+ struct vc_data *sel_cons;		/* must not be disallocated */
++static int use_unicode;
+ static volatile int sel_start = -1; 	/* cleared by clear_selection */
+ static int sel_end;
+ static int sel_buffer_lth;
+@@ -54,10 +56,11 @@
+ 	complement_pos(sel_cons, where);
+ }
+ 
+-static unsigned char
++static u16
+ sel_pos(int n)
+ {
+-	return inverse_translate(sel_cons, screen_glyph(sel_cons, n));
++	return inverse_translate(sel_cons, screen_glyph(sel_cons, n),
++				use_unicode);
+ }
+ 
+ /* remove the current selection highlight, if any,
+@@ -86,8 +89,8 @@
+   0xFF7FFFFF  /* latin-1 accented letters, not division sign */
+ };
+ 
+-static inline int inword(const unsigned char c) {
+-	return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1;
++static inline int inword(const u16 c) {
++	return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
+ }
+ 
+ /* set inwordLut contents. Invoked by ioctl(). */
+@@ -108,13 +111,36 @@
+ 	return (v > u) ? u : v;
+ }
+ 
++/* stores the char in UTF8 and returns the number of bytes used (1-3) */
++int store_utf8(u16 c, char *p) 
++{
++	if (c < 0x80) {
++		/*  0******* */
++		p[0] = c;
++		return 1;
++	} else if (c < 0x800) {
++		/* 110***** 10****** */
++		p[0] = 0xc0 | (c >> 6);
++		p[1] = 0x80 | (c & 0x3f);
++		return 2;
++    	} else {
++		/* 1110**** 10****** 10****** */
++		p[0] = 0xe0 | (c >> 12);
++		p[1] = 0x80 | ((c >> 6) & 0x3f);
++		p[2] = 0x80 | (c & 0x3f);
++		return 3;
++    	}
++}
++
+ /* set the current selection. Invoked by ioctl() or by kernel code. */
+ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty)
+ {
+ 	struct vc_data *vc = vc_cons[fg_console].d;
+ 	int sel_mode, new_sel_start, new_sel_end, spc;
+ 	char *bp, *obp;
+-	int i, ps, pe;
++	int i, ps, pe, multiplier;
++	u16 c;
++	struct kbd_struct *kbd = kbd_table + fg_console;
+ 
+ 	poke_blanked_console();
+ 
+@@ -158,7 +184,8 @@
+ 		clear_selection();
+ 		sel_cons = vc_cons[fg_console].d;
+ 	}
+-
++	use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
++	
+ 	switch (sel_mode)
+ 	{
+ 		case TIOCL_SELCHAR:	/* character-by-character selection */
+@@ -240,7 +267,8 @@
+ 	sel_end = new_sel_end;
+ 
+ 	/* Allocate a new buffer before freeing the old one ... */
+-	bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL);
++	multiplier = use_unicode ? 3 : 1;  /* chars can take up to 3 bytes */
++	bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
+ 	if (!bp) {
+ 		printk(KERN_WARNING "selection: kmalloc() failed\n");
+ 		clear_selection();
+@@ -252,8 +280,12 @@
+ 
+ 	obp = bp;
+ 	for (i = sel_start; i <= sel_end; i += 2) {
+-		*bp = sel_pos(i);
+-		if (!isspace(*bp++))
++		c = sel_pos(i);
++		if (use_unicode)
++			bp += store_utf8(c, bp);
++		else
++			*bp++ = c;
++		if (!isspace(c))
+ 			obp = bp;
+ 		if (! ((i + 2) % vc->vc_size_row)) {
+ 			/* strip trailing blanks from line and add newline,
+diff -ur linux-2.6.12.5.orig/include/linux/consolemap.h linux-2.6.12.5/include/linux/consolemap.h
+--- linux-2.6.12.5.orig/include/linux/consolemap.h	2005-08-15 06:20:18.000000000 +0600
++++ linux-2.6.12.5/include/linux/consolemap.h	2005-09-23 19:49:09.000000000 +0600
+@@ -10,6 +10,7 @@
+ 
+ struct vc_data;
+ 
+-extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
++extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
+ extern unsigned short *set_translate(int m, struct vc_data *vc);
+ extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
++extern u32 conv_8bit_to_uni(unsigned char c);




More information about the patches mailing list