into - split string with length in c#




Splitting a string into chunks of a certain size (20)

Six years later o_O

Just because

    public static IEnumerable<string> Split(this string str, int chunkSize, bool remainingInFront)
    {
        var count = (int) Math.Ceiling(str.Length/(double) chunkSize);
        Func<int, int> start = index => remainingInFront ? str.Length - (count - index)*chunkSize : index*chunkSize;
        Func<int, int> end = index => Math.Min(str.Length - Math.Max(start(index), 0), Math.Min(start(index) + chunkSize - Math.Max(start(index), 0), chunkSize));
        return Enumerable.Range(0, count).Select(i => str.Substring(Math.Max(start(i), 0),end(i)));
    }

or

    private static Func<bool, int, int, int, int, int> start = (remainingInFront, length, count, index, size) =>
        remainingInFront ? length - (count - index) * size : index * size;

    private static Func<bool, int, int, int, int, int, int> end = (remainingInFront, length, count, index, size, start) =>
        Math.Min(length - Math.Max(start, 0), Math.Min(start + size - Math.Max(start, 0), size));

    public static IEnumerable<string> Split(this string str, int chunkSize, bool remainingInFront)
    {
        var count = (int)Math.Ceiling(str.Length / (double)chunkSize);
        return Enumerable.Range(0, count).Select(i => str.Substring(
            Math.Max(start(remainingInFront, str.Length, count, i, chunkSize), 0),
            end(remainingInFront, str.Length, count, i, chunkSize, start(remainingInFront, str.Length, count, i, chunkSize))
        ));
    }

AFAIK all edge cases are handled.

Console.WriteLine(string.Join(" ", "abc".Split(2, false))); // ab c
Console.WriteLine(string.Join(" ", "abc".Split(2, true))); // a bc
Console.WriteLine(string.Join(" ", "a".Split(2, true))); // a
Console.WriteLine(string.Join(" ", "a".Split(2, false))); // a

Suppose I had a string:

string str = "1111222233334444"; 

How can I break this string into chunks of some size?

e.g., breaking this into sizes of 4 would return strings:

"1111"
"2222"
"3333"
"4444"

An important tip if the string that is being chunked needs to support all Unicode characters.

If the string is to support international characters like 𠀋, then split up the string using the System.Globalization.StringInfo class. Using StringInfo, you can split up the string based on number of text elements.

string internationalString = '𠀋';

The above string has a Length of 2, because the String.Length property returns the number of Char objects in this instance, not the number of Unicode characters.


Best , Easiest and Generic Answer :).

    string originalString = "1111222233334444";
    List<string> test = new List<string>();
    int chunkSize = 4; // change 4 with the size of strings you want.
    for (int i = 0; i < originalString.Length; i = i + chunkSize)
    {
        if (originalString.Length - i >= chunkSize)
            test.Add(originalString.Substring(i, chunkSize));
        else
            test.Add(originalString.Substring(i,((originalString.Length - i))));
    }

Changed slightly to return parts whose size not equal to chunkSize

public static IEnumerable<string> Split(this string str, int chunkSize)
    {
        var splits = new List<string>();
        if (str.Length < chunkSize) { chunkSize = str.Length; }
        splits.AddRange(Enumerable.Range(0, str.Length / chunkSize).Select(i => str.Substring(i * chunkSize, chunkSize)));
        splits.Add(str.Length % chunkSize > 0 ? str.Substring((str.Length / chunkSize) * chunkSize, str.Length - ((str.Length / chunkSize) * chunkSize)) : string.Empty);
        return (IEnumerable<string>)splits;
    }

I can't remember who gave me this, but it works great. I speed tested a number of ways to break Enumerable types into groups. The usage would just be like this...

List<string> Divided = Source3.Chunk(24).Select(Piece => string.Concat<char>(Piece)).ToList();

The extention code would look like this...

#region Chunk Logic
private class ChunkedEnumerable<T> : IEnumerable<T>
{
    class ChildEnumerator : IEnumerator<T>
    {
        ChunkedEnumerable<T> parent;
        int position;
        bool done = false;
        T current;


        public ChildEnumerator(ChunkedEnumerable<T> parent)
        {
            this.parent = parent;
            position = -1;
            parent.wrapper.AddRef();
        }

        public T Current
        {
            get
            {
                if (position == -1 || done)
                {
                    throw new InvalidOperationException();
                }
                return current;

            }
        }

        public void Dispose()
        {
            if (!done)
            {
                done = true;
                parent.wrapper.RemoveRef();
            }
        }

        object System.Collections.IEnumerator.Current
        {
            get { return Current; }
        }

        public bool MoveNext()
        {
            position++;

            if (position + 1 > parent.chunkSize)
            {
                done = true;
            }

            if (!done)
            {
                done = !parent.wrapper.Get(position + parent.start, out current);
            }

            return !done;

        }

        public void Reset()
        {
            // per http://msdn.microsoft.com/en-us/library/system.collections.ienumerator.reset.aspx
            throw new NotSupportedException();
        }
    }

    EnumeratorWrapper<T> wrapper;
    int chunkSize;
    int start;

    public ChunkedEnumerable(EnumeratorWrapper<T> wrapper, int chunkSize, int start)
    {
        this.wrapper = wrapper;
        this.chunkSize = chunkSize;
        this.start = start;
    }

    public IEnumerator<T> GetEnumerator()
    {
        return new ChildEnumerator(this);
    }

    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }

}
private class EnumeratorWrapper<T>
{
    public EnumeratorWrapper(IEnumerable<T> source)
    {
        SourceEumerable = source;
    }
    IEnumerable<T> SourceEumerable { get; set; }

    Enumeration currentEnumeration;

    class Enumeration
    {
        public IEnumerator<T> Source { get; set; }
        public int Position { get; set; }
        public bool AtEnd { get; set; }
    }

    public bool Get(int pos, out T item)
    {

        if (currentEnumeration != null && currentEnumeration.Position > pos)
        {
            currentEnumeration.Source.Dispose();
            currentEnumeration = null;
        }

        if (currentEnumeration == null)
        {
            currentEnumeration = new Enumeration { Position = -1, Source = SourceEumerable.GetEnumerator(), AtEnd = false };
        }

        item = default(T);
        if (currentEnumeration.AtEnd)
        {
            return false;
        }

        while (currentEnumeration.Position < pos)
        {
            currentEnumeration.AtEnd = !currentEnumeration.Source.MoveNext();
            currentEnumeration.Position++;

            if (currentEnumeration.AtEnd)
            {
                return false;
            }

        }

        item = currentEnumeration.Source.Current;

        return true;
    }

    int refs = 0;

    // needed for dispose semantics 
    public void AddRef()
    {
        refs++;
    }

    public void RemoveRef()
    {
        refs--;
        if (refs == 0 && currentEnumeration != null)
        {
            var copy = currentEnumeration;
            currentEnumeration = null;
            copy.Source.Dispose();
        }
    }
}
/// <summary>Speed Checked.  Works Great!</summary>
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
{
    if (chunksize < 1) throw new InvalidOperationException();

    var wrapper = new EnumeratorWrapper<T>(source);

    int currentPos = 0;
    T ignore;
    try
    {
        wrapper.AddRef();
        while (wrapper.Get(currentPos, out ignore))
        {
            yield return new ChunkedEnumerable<T>(wrapper, chunksize, currentPos);
            currentPos += chunksize;
        }
    }
    finally
    {
        wrapper.RemoveRef();
    }
}
#endregion

I recently had to write something that accomplishes this at work, so I thought I would post my solution to this problem. As an added bonus, the functionality of this solution provides a way to split the string in the opposite direction and it does correctly handle unicode characters as previously mentioned by Marvin Pinto above. So, here it is:

using System;
using Extensions;

namespace TestCSharp
{
    class Program
    {
        static void Main(string[] args)
        {    
            string asciiStr = "This is a string.";
            string unicodeStr = "これは文字列です。";

            string[] array1 = asciiStr.Split(4);
            string[] array2 = asciiStr.Split(-4);

            string[] array3 = asciiStr.Split(7);
            string[] array4 = asciiStr.Split(-7);

            string[] array5 = unicodeStr.Split(5);
            string[] array6 = unicodeStr.Split(-5);
        }
    }
}

namespace Extensions
{
    public static class StringExtensions
    {
        /// <summary>Returns a string array that contains the substrings in this string that are seperated a given fixed length.</summary>
        /// <param name="s">This string object.</param>
        /// <param name="length">Size of each substring.
        ///     <para>CASE: length &gt; 0 , RESULT: String is split from left to right.</para>
        ///     <para>CASE: length == 0 , RESULT: String is returned as the only entry in the array.</para>
        ///     <para>CASE: length &lt; 0 , RESULT: String is split from right to left.</para>
        /// </param>
        /// <returns>String array that has been split into substrings of equal length.</returns>
        /// <example>
        ///     <code>
        ///         string s = "1234567890";
        ///         string[] a = s.Split(4); // a == { "1234", "5678", "90" }
        ///     </code>
        /// </example>            
        public static string[] Split(this string s, int length)
        {
            System.Globalization.StringInfo str = new System.Globalization.StringInfo(s);

            int lengthAbs = Math.Abs(length);

            if (str == null || str.LengthInTextElements == 0 || lengthAbs == 0 || str.LengthInTextElements <= lengthAbs)
                return new string[] { str.ToString() };

            string[] array = new string[(str.LengthInTextElements % lengthAbs == 0 ? str.LengthInTextElements / lengthAbs: (str.LengthInTextElements / lengthAbs) + 1)];

            if (length > 0)
                for (int iStr = 0, iArray = 0; iStr < str.LengthInTextElements && iArray < array.Length; iStr += lengthAbs, iArray++)
                    array[iArray] = str.SubstringByTextElements(iStr, (str.LengthInTextElements - iStr < lengthAbs ? str.LengthInTextElements - iStr : lengthAbs));
            else // if (length < 0)
                for (int iStr = str.LengthInTextElements - 1, iArray = array.Length - 1; iStr >= 0 && iArray >= 0; iStr -= lengthAbs, iArray--)
                    array[iArray] = str.SubstringByTextElements((iStr - lengthAbs < 0 ? 0 : iStr - lengthAbs + 1), (iStr - lengthAbs < 0 ? iStr + 1 : lengthAbs));

            return array;
        }
    }
}

Also, here is an image link to the results of running this code: http://i.imgur.com/16Iih.png


I've slightly build up on João's solution. What I've done differently is in my method you can actually specify whether you want to return the array with remaining characters or whether you want to truncate them if the end characters do not match your required chunk length, I think it's pretty flexible and the code is fairly straight forward:

using System;
using System.Linq;
using System.Text.RegularExpressions;

namespace SplitFunction
{
    class Program
    {
        static void Main(string[] args)
        {
            string text = "hello, how are you doing today?";
            string[] chunks = SplitIntoChunks(text, 3,false);
            if (chunks != null)
            {
                chunks.ToList().ForEach(e => Console.WriteLine(e));
            }

            Console.ReadKey();
        }

        private static string[] SplitIntoChunks(string text, int chunkSize, bool truncateRemaining)
        {
            string chunk = chunkSize.ToString(); 
            string pattern = truncateRemaining ? ".{" + chunk + "}" : ".{1," + chunk + "}";

            string[] chunks = null;
            if (chunkSize > 0 && !String.IsNullOrEmpty(text))
                chunks = (from Match m in Regex.Matches(text,pattern)select m.Value).ToArray(); 

            return chunks;
        }     
    }
}

If necessary to split by few different length: For example you have date and time in specify format stringstrangeStr = "07092016090532"; 07092016090532 (Date:07.09.2016 Time: 09:05:32)

public static IEnumerable<string> SplitBy(this string str, int[] chunkLength)
    {
        if (String.IsNullOrEmpty(str)) throw new ArgumentException();
        int i = 0;
        for (int j = 0; j < chunkLength.Length; j++)
        {
            if (chunkLength[j] < 1) throw new ArgumentException();
            if (chunkLength[j] + i > str.Length)
            {
                chunkLength[j] = str.Length - i;
            }
            yield return str.Substring(i, chunkLength[j]);
            i += chunkLength[j];
        }
    }

using:

string[] dt = strangeStr.SplitBy(new int[] { 2, 2, 4, 2, 2, 2, 2 }).ToArray();

It's not pretty and it's not fast, but it works, it's a one-liner and it's LINQy:

List<string> a = text.Select((c, i) => new { Char = c, Index = i }).GroupBy(o => o.Index / 4).Select(g => new String(g.Select(o => o.Char).ToArray())).ToList();

Modified (now it accepts any non null string and any positive chunkSize) Konstantin Spirin's solution:

public static IEnumerable<String> Split(String value, int chunkSize) {
  if (null == value)
    throw new ArgumentNullException("value");
  else if (chunkSize <= 0)
    throw new ArgumentOutOfRangeException("chunkSize", "Chunk size should be positive");

  return Enumerable
    .Range(0, value.Length / chunkSize + ((value.Length % chunkSize) == 0 ? 0 : 1))
    .Select(index => (index + 1) * chunkSize < value.Length 
      ? value.Substring(index * chunkSize, chunkSize)
      : value.Substring(index * chunkSize));
}

Tests:

  String source = @"ABCDEF";

  // "ABCD,EF"
  String test1 = String.Join(",", Split(source, 4));
  // "AB,CD,EF"
  String test2 = String.Join(",", Split(source, 2));
  // "ABCDEF"
  String test3 = String.Join(",", Split(source, 123));

Simple and short:

// this means match a space or not a space (anything) up to 4 characters
var lines = Regex.Matches(str, @"[\s\S]{0,4}").Cast<Match>().Select(x => x.Value);

This can be done in this way too

        string actualString = "1111222233334444";
        var listResult = new List<string>();
        int groupingLength = actualString.Length % 4;
        if (groupingLength > 0)
            listResult.Add(actualString.Substring(0, groupingLength));
        for (int i = groupingLength; i < actualString.Length; i += 4)
        {
            listResult.Add(actualString.Substring(i, 4));
        }

        foreach(var res in listResult)
        {
            Console.WriteLine(res);
        }
        Console.Read();

This should be much faster and more efficient than using LINQ or other approaches used here.

public static IEnumerable<string> Splice(this string s, int spliceLength)
{
    if (s == null)
        throw new ArgumentNullException("s");
    if (spliceLength < 1)
        throw new ArgumentOutOfRangeException("spliceLength");

    if (s.Length == 0)
        yield break;
    var start = 0;
    for (var end = spliceLength; end < s.Length; end += spliceLength)
    {
        yield return s.Substring(start, spliceLength);
        start = end;
    }
    yield return s.Substring(start);
}

Using regular expressions and Linq:

List<string> groups = (from Match m in Regex.Matches(str, @"\d{4}")
                       select m.Value).ToList();

I find this to be more readable, but it's just a personal opinion. It can also be a one-liner : ).


Why not loops? Here's something that would do it quite well:

        string str = "111122223333444455";
        int chunkSize = 4;
        int stringLength = str.Length;
        for (int i = 0; i < stringLength ; i += chunkSize)
        {
            if (i + chunkSize > stringLength) chunkSize = stringLength  - i;
            Console.WriteLine(str.Substring(i, chunkSize));

        }
        Console.ReadLine();

I don't know how you'd deal with case where the string is not factor of 4, but not saying you're idea is not possible, just wondering the motivation for it if a simple for loop does it very well? Obviously the above could be cleaned and even put in as an extension method.

Or as mentioned in comments, you know it's /4 then

str = "1111222233334444";
for (int i = 0; i < stringLength; i += chunkSize) 
  {Console.WriteLine(str.Substring(i, chunkSize));} 

You can use morelinq by Jon Skeet. Use Batch like:

string str = "1111222233334444";
int chunkSize = 4;
var chunks = str.Batch(chunkSize).Select(r => new String(r.ToArray()));

This will return 4 chunks for the string "1111222233334444". If the string length is less than or equal to the chunk size Batch will return the string as the only element of IEnumerable<string>

For output:

foreach (var chunk in chunks)
{
    Console.WriteLine(chunk);
}

and it will give:

1111
2222
3333
4444

List<string> SplitString(int chunk, string input)
{
    List<string> list = new List<string>();
    int cycles = input.Length / chunk;

    if (input.Length % chunk != 0)
        cycles++;

    for (int i = 0; i < cycles; i++)
    {
        try
        {
            list.Add(input.Substring(i * chunk, chunk));
        }
        catch
        {
            list.Add(input.Substring(i * chunk));
        }
    }
    return list;
}

class StringHelper
{
    static void Main(string[] args)
    {
        string str = "Hi my name is vikas bansal and my email id is [email protected]";
        int offSet = 10;

        List<string> chunks = chunkMyStr(str, offSet);

        Console.Read();
    }

    static List<string> chunkMyStr(string str, int offSet)
    {


        List<string> resultChunks = new List<string>();

        for (int i = 0; i < str.Length; i += offSet)
        {
            string temp = str.Substring(i, (str.Length - i) > offSet ? offSet : (str.Length - i));
            Console.WriteLine(temp);
            resultChunks.Add(temp);


        }

        return resultChunks;
    }
}

static IEnumerable<string> Split(string str, double chunkSize)
{
    return Enumerable.Range(0, (int) Math.Ceiling(str.Length/chunkSize))
       .Select(i => new string(str
           .Skip(i * (int)chunkSize)
           .Take((int)chunkSize)
           .ToArray()));
}

and another approach:

using System;
using System.Collections.Generic;
using System.Linq;

public class Program
{
    public static void Main()
    {

        var x = "Hello World";
        foreach(var i in x.ChunkString(2)) Console.WriteLine(i);
    }
}

public static class Ext{
    public static IEnumerable<string> ChunkString(this string val, int chunkSize){
        return val.Select((x,i) => new {Index = i, Value = x})
                  .GroupBy(x => x.Index/chunkSize, x => x.Value)
                  .Select(x => string.Join("",x));
    }
}

static IEnumerable<string> Split(string str, int chunkSize)
{
    return Enumerable.Range(0, str.Length / chunkSize)
        .Select(i => str.Substring(i * chunkSize, chunkSize));
}

Please note that additional code might be required to gracefully handle edge cases (null or empty input string, chunkSize == 0, input string length not divisible by chunkSize, etc.). The original question doesn't specify any requirements for these edge cases and in real life the requirements might vary so they are out of scope of this answer.





string