The Normal DP way to solve this question in Python is TLE, but I can run it using Java and the running time is almost 400ms. Also I can use static variable in Python which can solve this question in almost 300ms, but when I use static variable in Java, It is even much faster than 200ms

My double direction BFS way:

```
class Solution(object):
def numSquares(self, n):
"""
:type n: int
:rtype: int
"""
import math
head =[0]
tail =[n]
level = 1
head_square = set()
tail_square = set()
head_square.add(0)
tail_square.add(n)
while True:
if head <= tail:
next = []
while head:
temp = head[0]
del head[0]
for i in xrange(1,int(math.sqrt(n)) + 1):
if i*i + temp in tail_square:
return level
else:
if temp+i*i not in head_square:
head_square.add(temp + i*i)
next.append(temp + i*i)
head = next
level +=1
else:
next = []
while tail:
temp = tail[0]
del tail[0]
for i in xrange(1,int(math.sqrt(n)) + 1):
if temp - i*i in head_square:
return level
else:
if temp - i*i not in tail_square:
tail_square.add(temp - i*i)
next.append(temp - i*i)
head = next
level += 1
```

My BFS way:

```
class Solution(object):
def numSquares(self, n):
"""
:type n: int
:rtype: int
"""
import math
q =[n]
level = 1
visited = [False] * (n+1)
while True:
next = []
while q:
temp = q[0]
del q[0]
for i in xrange(1,int(math.sqrt(temp)) + 1):
if i*i == temp:
return level
else:
if visited[temp - i*i] == True or temp - i*i < 0:
continue
else:
next.append(temp - i*i)
visited[temp-i*i] = True
q = next
level +=1
```

static DP way(Thanks for StefanPochmann)

```
class Solution(object):
_f = [0]
def numSquares(self, n):
"""
:type n: int
:rtype: int
"""
f = self._f
while len(f) <= n:
temp = f[-1]
j = 1
while j*j <= len(f):
temp = min(temp,f[-j*j])
j += 1
f.append(temp + 1)
return f[n]
```

My normal DP way(TLE!!!):

```
class Solution(object):
def numSquares(self, n):
"""
:type n: int
:rtype: int
"""
f=[0]* (n + 1)
m = sys.maxint
for i in range(n+1):
j = 0
while j*j <= i:
m = min(m,f[i-j*j] + 1)
j += 1
f[i] = m
return f[n]
```