# python字符串查找第二个 - python返回字符位置

## 在字符串中查找第n次出现的子字符串 (12)

`mystring.find("substring", 2nd)`

``````def find_nth(haystack, needle, n):
start = haystack.find(needle)
while start >= 0 and n > 1:
start = haystack.find(needle, start+len(needle))
n -= 1
return start
``````

``````>>> find_nth("foofoofoofoo", "foofoo", 2)
6
``````

``````def find_nth_overlapping(haystack, needle, n):
start = haystack.find(needle)
while start >= 0 and n > 1:
start = haystack.find(needle, start+1)
n -= 1
return start
``````

``````>>> find_nth_overlapping("foofoofoofoo", "foofoo", 2)
3
``````

1. 简单胜于复杂。
2. Flat比嵌套更好。
3. 可读性计数。

``````>>> import re
>>> s = "ababdfegtduab"
>>> [m.start() for m in re.finditer(r"ab",s)]
[0, 2, 11]
>>> [m.start() for m in re.finditer(r"ab",s)][2] #index 2 is third occurrence
11
``````

``````c = os.getcwd().split('\\')
print '\\'.join(c[0:-2])
``````

``````def find_nth(s, x, n):
i = -1
for _ in range(n):
i = s.find(x, i + len(x))
if i == -1:
break
return i

print find_nth('bananabanana', 'an', 3)
``````

``````def find_nth(s, x, n, i = 0):
i = s.find(x, i)
if n == 1 or i == -1:
return i
else:
return find_nth(s, x, n - 1, i + len(x))

print find_nth('bananabanana', 'an', 3)
``````

``````def findnth(haystack, needle, n):
parts= haystack.split(needle, n+1)
if len(parts)<=n+1:
return -1
return len(haystack)-len(parts[-1])-len(needle)

def find_nth(s, x, n=0, overlap=False):
l = 1 if overlap else len(x)
i = -l
for c in xrange(n + 1):
i = s.find(x, i + l)
if i < 0:
break
return i
``````

``````In [1]: import _find_nth, find_nth, mmap

In [2]: f = open('bigfile', 'r')

In [3]: mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
``````

`findnth()`已经存在第一个问题，因为`mmap.mmap`对象不支持`split()` 。 所以我们实际上必须将整个文件复制到内存中：

``````In [4]: %time s = mm[:]
CPU times: user 813 ms, sys: 3.25 s, total: 4.06 s
Wall time: 17.7 s
``````

``````In [5]: %timeit find_nth.findnth(s, '\n', 1000000)
1 loops, best of 3: 29.9 s per loop
``````

``````In [6]: %timeit find_nth.find_nth(s, '\n', 1000000)
1 loops, best of 3: 774 ms per loop
``````

``````In [7]: %timeit find_nth.find_nth(mm, '\n', 1000000)
1 loops, best of 3: 1.21 s per loop
``````

`mm``s`似乎有一个小的性能损失，但这说明`find_nth()`可以在1.2秒内得到答案，而`findnth`的总计为47秒。

``````#include <Python.h>
#include <string.h>

off_t _find_nth(const char *buf, size_t l, char c, int n) {
off_t i;
for (i = 0; i < l; ++i) {
if (buf[i] == c && n-- == 0) {
return i;
}
}
return -1;
}

off_t _find_nth2(const char *buf, size_t l, char c, int n) {
const char *b = buf - 1;
do {
b = memchr(b + 1, c, l);
if (!b) return -1;
} while (n--);
return b - buf;
}

/* mmap_object is private in mmapmodule.c - replicate beginning here */
typedef struct {
char *data;
size_t size;
} mmap_object;

typedef struct {
const char *s;
size_t l;
char c;
int n;
} params;

int parse_args(PyObject *args, params *P) {
PyObject *obj;
const char *x;

if (!PyArg_ParseTuple(args, "Osi", &obj, &x, &P->n)) {
return 1;
}
PyTypeObject *type = Py_TYPE(obj);

if (type == &PyString_Type) {
P->s = PyString_AS_STRING(obj);
P->l = PyString_GET_SIZE(obj);
} else if (!strcmp(type->tp_name, "mmap.mmap")) {
mmap_object *m_obj = (mmap_object*) obj;
P->s = m_obj->data;
P->l = m_obj->size;
} else {
PyErr_SetString(PyExc_TypeError, "Cannot obtain char * from argument 0");
return 1;
}
P->c = x[0];
return 0;
}

static PyObject* py_find_nth(PyObject *self, PyObject *args) {
params P;
if (!parse_args(args, &P)) {
return Py_BuildValue("i", _find_nth(P.s, P.l, P.c, P.n));
} else {
return NULL;
}
}

static PyObject* py_find_nth2(PyObject *self, PyObject *args) {
params P;
if (!parse_args(args, &P)) {
return Py_BuildValue("i", _find_nth2(P.s, P.l, P.c, P.n));
} else {
return NULL;
}
}

static PyMethodDef methods[] = {
{"find_nth", py_find_nth, METH_VARARGS, ""},
{"find_nth2", py_find_nth2, METH_VARARGS, ""},
{0}
};

PyMODINIT_FUNC init_find_nth(void) {
Py_InitModule("_find_nth", methods);
}
``````

``````from distutils.core import setup, Extension
module = Extension('_find_nth', sources=['_find_nthmodule.c'])
setup(ext_modules=[module])
``````

``````In [8]: %timeit _find_nth.find_nth(mm, '\n', 1000000)
1 loops, best of 3: 218 ms per loop

In [9]: %timeit _find_nth.find_nth(s, '\n', 1000000)
1 loops, best of 3: 216 ms per loop

In [10]: %timeit _find_nth.find_nth2(mm, '\n', 1000000)
1 loops, best of 3: 307 ms per loop

In [11]: %timeit _find_nth.find_nth2(s, '\n', 1000000)
1 loops, best of 3: 304 ms per loop
``````

``````len("substring".join([s for s in ori.split("substring")[:2]]))
``````

``````text = "This is a test from a test ok"

firstTest = text.find('test')

print text.find('test', firstTest + 1)
``````

``````import re
indices = [s.start() for s in re.finditer(':', yourstring)]
``````

``````n = 2
nth_entry = indices[n-1]
``````

``````num_instances = len(indices)
``````

``````def Find(String,ToFind,Occurence = 1):
index = 0
count = 0
while index <= len(String):
try:
if String[index:index + len(ToFind)] == ToFind:
count += 1
if count == Occurence:
return index
break
index += 1
except IndexError:
return False
break
return False
``````

``````import itertools
import re

def find_nth(haystack, needle, n = 1):
"""
Find the starting index of the nth occurrence of ``needle`` in \
``haystack``.

If ``needle`` is a ``str``, this will perform an exact substring
match; if it is a ``RegexpObject``, this will perform a regex
search.

If ``needle`` doesn't appear in ``haystack``, return ``-1``. If
``needle`` doesn't appear in ``haystack`` ``n`` times,
return ``-1``.

Arguments
---------
* ``needle`` the substring (or a ``RegexpObject``) to find
* ``haystack`` is a ``str``
* an ``int`` indicating which occurrence to find; defaults to ``1``

>>> find_nth("foo", "o", 1)
1
>>> find_nth("foo", "o", 2)
2
>>> find_nth("foo", "o", 3)
-1
>>> find_nth("foo", "b")
-1
>>> import re
>>> either_o = re.compile("[oO]")
>>> find_nth("foo", either_o, 1)
1
>>> find_nth("FOO", either_o, 1)
1
"""
if (hasattr(needle, 'finditer')):
matches = needle.finditer(haystack)
else:
matches = re.finditer(re.escape(needle), haystack)
start_here = itertools.dropwhile(lambda x: x[0] < n, enumerate(matches, 1))
try:
return next(start_here)[1].start()
except StopIteration:
return -1
``````

``````# return -1 if nth substr (0-indexed) d.n.e, else return index
def find_nth(s, substr, n):
i = 0
while n >= 0:
n -= 1
i = s.find(substr, i + 1)
return i
``````

``````>>> s="abcdefabcdefababcdef"
>>> j=0
>>> for n,i in enumerate(s):
...   if s[n:n+2] =="ab":
...     print n,i
...     j=j+1
...     if j==2: print "2nd occurence at index position: ",n
...
0 a
6 a
2nd occurence at index position:  6
12 a
14 a
``````