001    /* CollationElementIterator.java -- Walks through collation elements
002       Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010     
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    
039    package java.text;
040    
041    import java.util.ArrayList;
042    
043    /* Written using "Java Class Libraries", 2nd edition, plus online
044     * API docs for JDK 1.2 from http://www.javasoft.com.
045     * Status: Believed complete and correct to JDK 1.1.
046     */
047    
048    /**
049     * This class walks through the character collation elements of a 
050     * <code>String</code> as defined by the collation rules in an instance of 
051     * <code>RuleBasedCollator</code>.  There is no public constructor for
052     * this class.  An instance is created by calling the
053     * <code>getCollationElementIterator</code> method on 
054     * <code>RuleBasedCollator</code>.
055     *
056     * @author Aaron M. Renn (arenn@urbanophile.com)
057     * @author Tom Tromey (tromey@cygnus.com)
058     * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
059     */
060    public final class CollationElementIterator
061    {
062      /**
063       * This is a constant value that is returned to indicate that the end of 
064       * the string was encountered.
065       */
066      public static final int NULLORDER = -1;
067    
068      /**
069       * This is the RuleBasedCollator this object was created from.
070       */
071      RuleBasedCollator collator;
072    
073      /**
074       * This is the String that is being iterated over.
075       */
076      String text;
077    
078      /**
079       * This is the index into the collation decomposition where we are currently scanning.
080       */
081      int index;
082    
083      /**
084       * This is the index into the String where we are currently scanning.
085       */
086      int textIndex;
087    
088      /**
089       * Array containing the collation decomposition of the
090       * text given to the constructor.
091       */
092      private RuleBasedCollator.CollationElement[] text_decomposition;
093    
094      /**
095       * Array containing the index of the specified block.
096       */
097      private int[] text_indexes;
098    
099      /**
100       * This method initializes a new instance of <code>CollationElementIterator</code>
101       * to iterate over the specified <code>String</code> using the rules in the
102       * specified <code>RuleBasedCollator</code>.
103       *
104       * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
105       * @param text The <code>String</code> to iterate over.
106       */
107      CollationElementIterator(RuleBasedCollator collator, String text)
108      {
109        this.collator = collator;
110        
111        setText (text);    
112      }
113    
114      RuleBasedCollator.CollationElement nextBlock()
115      {
116        if (index >= text_decomposition.length)
117          return null;
118        
119        RuleBasedCollator.CollationElement e = text_decomposition[index];
120        
121        textIndex = text_indexes[index+1];
122    
123        index++;
124    
125        return e;
126      }
127    
128      RuleBasedCollator.CollationElement previousBlock()
129      {
130        if (index == 0)
131          return null;
132        
133        index--;
134        RuleBasedCollator.CollationElement e = text_decomposition[index];
135    
136        textIndex = text_indexes[index+1];
137        
138        return e;
139      }
140    
141      /**
142       * This method returns the collation ordering value of the next character sequence
143       * in the string (it may be an extended character following collation rules).
144       * This method will return <code>NULLORDER</code> if the
145       * end of the string was reached.
146       *
147       * @return The collation ordering value.
148       */
149      public int next()
150      {
151        RuleBasedCollator.CollationElement e = nextBlock();
152    
153        if (e == null)
154          return NULLORDER;
155        
156        return e.getValue();
157      }
158    
159      /**
160       * This method returns the collation ordering value of the previous character
161       * in the string.  This method will return <code>NULLORDER</code> if the
162       * beginning of the string was reached.
163       *
164       * @return The collation ordering value.
165       */
166      public int previous()
167      {
168        RuleBasedCollator.CollationElement e = previousBlock();
169    
170        if (e == null)
171          return NULLORDER;
172        
173        return e.getValue();
174      }
175    
176      /**
177       * This method returns the primary order value for the given collation
178       * value.
179       *
180       * @param order The collation value returned from <code>next()</code> or 
181       *              <code>previous()</code>.
182       *
183       * @return The primary order value of the specified collation value.  This is
184       *         the high 16 bits.
185       */
186      public static int primaryOrder(int order)
187      {
188        // From the JDK 1.2 spec.
189        return order >>> 16;
190      }
191    
192      /**
193       * This method resets the internal position pointer to read from the
194       * beginning of the <code>String</code> again.
195       */
196      public void reset()
197      {
198        index = 0;
199        textIndex = 0;
200      }
201    
202      /**
203       * This method returns the secondary order value for the given collation
204       * value.
205       *
206       * @param order The collation value returned from <code>next()</code> or 
207       *              <code>previous()</code>.
208       *
209       * @return The secondary order value of the specified collation value.  This 
210       *         is the bits 8-15.
211       */
212      public static short secondaryOrder(int order)
213      {
214        // From the JDK 1.2 spec.
215        return (short) ((order >>> 8) & 255);
216      }
217    
218      /**
219       * This method returns the tertiary order value for the given collation
220       * value.
221       *
222       * @param order The collation value returned from <code>next()</code> or 
223       *              <code>previous()</code>.
224       *
225       * @return The tertiary order value of the specified collation value.  This 
226       *         is the low eight bits.
227       */
228      public static short tertiaryOrder(int order)
229      {
230        // From the JDK 1.2 spec.
231        return (short) (order & 255);
232      }
233    
234      /**
235       * This method sets the <code>String</code> that it is iterating over
236       * to the specified <code>String</code>.
237       *
238       * @param text The new <code>String</code> to iterate over.
239       *
240       * @since 1.2
241       */
242      public void setText(String text)
243      {
244        int idx = 0;
245        int idx_idx = 0;
246        int alreadyExpanded = 0;
247        int idxToMove = 0;
248    
249        this.text = text;
250        this.index = 0;
251    
252        String work_text = text.intern();
253    
254        ArrayList a_element = new ArrayList();
255        ArrayList a_idx = new ArrayList();
256    
257        // Build element collection ordered as they come in "text".
258        while (idx < work_text.length())
259          {
260            String key, key_old;
261    
262            Object object = null;
263            int p = 1;
264            
265            // IMPROVE: use a TreeMap with a prefix-ordering rule.
266            key_old = key = null;
267            do
268              {
269                if (object != null)
270                  key_old = key;
271                key = work_text.substring (idx, idx+p);
272                object = collator.prefix_tree.get (key);
273                if (object != null && idx < alreadyExpanded)
274                  {
275                    RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
276                    if (prefix.expansion != null && 
277                        prefix.expansion.startsWith(work_text.substring(0, idx)))
278                    {
279                      object = null;
280                      key = key_old;
281                    }
282                  }
283                p++;
284              }
285            while (idx+p <= work_text.length());
286            
287            if (object == null)
288              key = key_old;
289            
290            RuleBasedCollator.CollationElement prefix =
291              (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
292    
293            /*
294             * First case: There is no such sequence in the database.
295             * We will have to build one from the context.
296             */
297            if (prefix == null)
298              {
299                /*
300                 * We are dealing with sequences in an expansion. They
301                 * are treated as accented characters (tertiary order).
302                 */
303                if (alreadyExpanded > 0)
304                  {
305                    RuleBasedCollator.CollationElement e =
306                      collator.getDefaultAccentedElement (work_text.charAt (idx));
307                    
308                    a_element.add (e);
309                    a_idx.add (new Integer(idx_idx));
310                    idx++;
311                    alreadyExpanded--;
312                    if (alreadyExpanded == 0)
313                      {
314                        /* There is not any characters left in the expansion set.
315                         * We can increase the pointer in the source string.
316                         */
317                        idx_idx += idxToMove;
318                        idxToMove = 0; 
319                      }
320                    else
321                      idx_idx++;
322                  }
323                else
324                  {
325                    /* This is a normal character. */
326                    RuleBasedCollator.CollationElement e =
327                      collator.getDefaultElement (work_text.charAt (idx));
328                    Integer i_ref = new Integer(idx_idx);
329    
330                    /* Don't forget to mark it as a special sequence so the
331                     * string can be ordered.
332                     */
333                    a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
334                    a_idx.add (i_ref);
335                    a_element.add (e);
336                    a_idx.add (i_ref);
337                    idx_idx++;
338                    idx++;
339                  }
340                continue;
341              }
342     
343            /*
344             * Second case: Here we have found a matching sequence.
345             * Here we have an expansion string prepend it to the "work text" and
346             * add the corresponding sorting element. We must also mark 
347             */
348            if (prefix.expansion != null)
349              {
350                work_text = prefix.expansion
351                  + work_text.substring (idx+prefix.key.length());
352                idx = 0;
353                a_element.add (prefix);
354                a_idx.add (new Integer(idx_idx));
355                if (alreadyExpanded == 0)
356                  idxToMove = prefix.key.length();
357                alreadyExpanded += prefix.expansion.length()-prefix.key.length();
358              }
359            else
360              {
361                /* Third case: the simplest. We have got the prefix and it
362                 * has not to be expanded.
363                 */
364                a_element.add (prefix);
365                a_idx.add (new Integer(idx_idx));
366                idx += prefix.key.length();
367                /* If the sequence is in an expansion, we must decrease the
368                 * counter.
369                 */
370                if (alreadyExpanded > 0)
371                  {
372                    alreadyExpanded -= prefix.key.length();
373                    if (alreadyExpanded == 0)
374                      {
375                        idx_idx += idxToMove;
376                        idxToMove = 0;
377                      }
378                  }
379                else
380                  idx_idx += prefix.key.length();
381              }
382          }
383        
384        text_decomposition = (RuleBasedCollator.CollationElement[])
385               a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
386        text_indexes = new int[a_idx.size()+1];
387        for (int i = 0; i < a_idx.size(); i++) 
388          {
389            text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
390          }
391        text_indexes[a_idx.size()] = text.length();
392      }
393    
394      /**
395       * This method sets the <code>String</code> that it is iterating over
396       * to the <code>String</code> represented by the specified
397       * <code>CharacterIterator</code>.
398       *
399       * @param source The <code>CharacterIterator</code> containing the new
400       * <code>String</code> to iterate over.
401       */
402      public void setText(CharacterIterator source)
403      {
404        StringBuffer expand = new StringBuffer();
405    
406        // For now assume we read from the beginning of the string.
407        for (char c = source.first();
408             c != CharacterIterator.DONE;
409             c = source.next())
410          expand.append(c);
411    
412        setText(expand.toString());
413      }
414    
415      /**
416       * This method returns the current offset into the <code>String</code>
417       * that is being iterated over.
418       *
419       * @return The iteration index position.
420       *
421       * @since 1.2
422       */
423      public int getOffset()
424      {
425        return textIndex;
426      }
427    
428      /**
429       * This method sets the iteration index position into the current
430       * <code>String</code> to the specified value.  This value must not
431       * be negative and must not be greater than the last index position
432       * in the <code>String</code>.
433       *
434       * @param offset The new iteration index position.
435       *
436       * @exception IllegalArgumentException If the new offset is not valid.
437       */
438      public void setOffset(int offset)
439      {
440        if (offset < 0)
441          throw new IllegalArgumentException("Negative offset: " + offset);
442    
443        if (offset > (text.length() - 1))
444          throw new IllegalArgumentException("Offset too large: " + offset);
445        
446        for (index = 0; index < text_decomposition.length; index++)
447          { 
448            if (offset <= text_indexes[index])
449              break;
450          }
451        /*
452         * As text_indexes[0] == 0, we should not have to take care whether index is
453         * greater than 0. It is always.
454         */
455        if (text_indexes[index] == offset)
456          textIndex = offset;
457        else
458          textIndex = text_indexes[index-1];
459      }
460    
461      /**
462       * This method returns the maximum length of any expansion sequence that
463       * ends with the specified collation order value.  (Whatever that means).
464       *
465       * @param value The collation order value
466       *
467       * @return The maximum length of an expansion sequence.
468       */
469      public int getMaxExpansion(int value)
470      {
471        return 1;
472      }
473    }