# Help me solving this String based problem from amazon interview

• Given a paragraph of text, write a program to find the first shortest sub-segment that contains each of the given k words at least once. A segment is said to be shorter than other if it contains less number of words.

Ignore characters other than [a-z][A-Z] in the text. Comparison between the strings should be case-insensitive.

If no sub-segment is found then the program should output “NO SUBSEGMENT FOUND”.

Input format :

First line of the input contains the text.
Next line contains k , the number of words given to be searched.
Each of the next k lines contains a word.

Output format :

Print first shortest sub-segment that contains given k words , ignore special characters, numbers.If no sub-segment is found it should return “NO SUBSEGMENT FOUND”

Sample Input :

This is a test. This is a programming test. This is a programming test in any language.
4
this
a
test
programming

Sample Output :

a programming test This

Explanation :
In this test case segment "a programming test. This" contains given four words. You have to print without special characters, numbers so output is "a programming test This". Another segment "This is a programming test." also contains given four words but have more number of words.

Constraint :

Total number of character in a paragraph will not be more than 200,000.
0 < k <= no. of words in paragraph.
0 < Each word length < 15

• A python solution using moving window. Needs further optimization.

def scan(para, keys):
words = para.lower().replace('.', '').split()
keys_pos = {x: [] for x in keys}
win_begin = 0
win_end = 0
min_seg = (-1, len(words))

for i, word in enumerate(words):
if word not in keys_pos:
continue
win_end = i

pos = keys_pos[word]
pos.append(i)
if len(pos) > 1 and pos[0] == win_begin:
del pos[0]
win_begin = min(x[0] for x in keys_pos.values() if len(x) > 0)

if all(len(x) > 0 for x in keys_pos.values()):
valid_seg = (win_begin, win_end)
if valid_seg[1] - valid_seg[0] < min_seg[1] - min_seg[0]:
min_seg = valid_seg

if min_seg[0] == -1:
return None
return ' '.join(words[min_seg[0]:min_seg[1]+1])

if __name__ == '__main__':
para = "This is a test. This is a programming test. This is a programming test in any language."
keys = ["this", "a", "test", "programming"]
print(scan(para, keys))

• I used a window that increased until all the words were found. After this I tried to shrink the window from start while having all the words at least once

#include <string>
#include <cstring>
#include <iostream>
#include <vector>
#include <queue>
#include <map>
#include <algorithm>
#include <queue>
#include <climits>

using namespace std;
#define SIZE 200000

struct cmp_str
{
bool operator()(char const *a, char const *b)
{
return std::strcmp(a, b) < 0;
}
};

int updateHash(int zeros, map <char *, int, cmp_str> &m, char *key, int increase) {

if (increase){
m[key]++;
if (m[key] == 1)
zeros--;
} else {
m[key]--;
if (m[key] == 0){
zeros++;
}
}
return zeros;
}

void solve(string sentence, string words) {

std::transform(sentence.begin(),sentence.end(),sentence.begin(), ::tolower);

char str[SIZE], str2[SIZE];
char *token;
//initial string split
vector <char*> split;
//words hash
map <char*,int, cmp_str> words_map;

int zeros = 0, dist = INT_MAX, start_i , start_j;

strcpy(str,sentence.c_str());
token = strtok(str, ". ");

while(token != NULL) {
split.push_back(token);
token = strtok(NULL, ". ");
}

strcpy(str2,words.c_str());
token = strtok(str2, ". ");

while (token != NULL){
zeros++;
words_map[token] = 0;
token = strtok(NULL,". ");
}

int start = 0, end = 0, n = split.size();

/*main stuff here*/

while (end < n) {
while(end < n && zeros > 0){
if(words_map.find(split[end]) == words_map.end()){
} else {
zeros = updateHash(zeros,words_map, split[end],1);
}
end ++;
}
dist = min(dist,(end-start));
while (zeros == 0 && start < end) {
if (words_map.find(split[start]) != words_map.end()){
zeros = updateHash(zeros,words_map, split[start],0);
}
start++;
if (dist > min(dist,(end-start))){
dist = end - start;
start_i = start;
start_j = end;
}
}
end++;
}
if (start == 0 && end == n && zeros > 0) {
}
for (int k = start_i; k < start_j; k++){
cout << split[k] << endl;
}
}

int main() {
std::string sentence("This is a test. This is a programming test. This is a programming test in any language.");
std::string words = ("this a test programming");
solve(sentence,words);

return 0;
}

• import java.io.*;
import java.util.*;

class myCode
{
public static void main (String[] args) throws java.lang.Exception
{
HashMap<String, Integer> hm = new HashMap<String, Integer>();
hm.put("this",0);
hm.put("a",0);
hm.put("test",0);
hm.put("programming",0);
String input = "This is a test. This is a programming test. This is a programming test in any language.";
String newStr = input.replaceAll("\\.","");
String words[] = newStr.toLowerCase().split(" ");
int start = 0;
int count = 0;
int window = 0;
int min = window;
for(int i=0;i<words.length;i++){
System.out.println("Value of i is "+i);
count = 0;
if(hm.containsKey(words[i]))
{
hm.put(words[i],hm.get(words[i])+1);
}
else{
window++;
}
for(String key : hm.keySet()){
if(hm.get(key) >=1){
count++;
}
else
break;

}
if(count == 4){
if(min>window){
for(int j=start;j<i+1;j++){
System.out.print(words[j]+" ");
}
min = window;
}
if(window == 0)
{
break;
}
System.out.println();
for(String key : hm.keySet()){
hm.put(key,0);
}
start = start+window;
System.out.println("Start is "+start+" and window is "+window);
i = start-1;
min = window;
window = 0;
}

}

}
}

• import java.io.*;
import java.util.*;

class myCode
{
public static void main (String[] args) throws java.lang.Exception
{
HashMap<String, Integer> hm = new HashMap<String, Integer>();
hm.put("this",0);
hm.put("a",0);
hm.put("test",0);
hm.put("programming",0);
String input = "This is a test. This is a programming test. This is a programming test in any language.";
String newStr = input.replaceAll("\\.","");
String words[] = newStr.toLowerCase().split(" ");
int start = 0;
int count = 0;
int window = 0;
int min = window;
for(int i=0;i<words.length;i++){
System.out.println("Value of i is "+i);
count = 0;
if(hm.containsKey(words[i]))
{
hm.put(words[i],hm.get(words[i])+1);
}
else{
window++;
}
for(String key : hm.keySet()){
if(hm.get(key) >=1){
count++;
}
else
break;

}
if(count == 4){
if(min>window){
for(int j=start;j<i+1;j++){
System.out.print(words[j]+" ");
}
min = window;
}
if(window == 0)
{
break;
}
System.out.println();
for(String key : hm.keySet()){
hm.put(key,0);
}
start = start+window;
System.out.println("Start is "+start+" and window is "+window);
i = start-1;
min = window;
window = 0;
}

}

}
}

I have to optimize this java code a bit further. Feedback for this code is appreciated.

• This post is deleted!

• get this done by C#

using System;
using System.Linq;
using System.Collections.Generic;
using System.Text.RegularExpressions;

namespace FindShortestSegmentContainingAllGivenWords
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Find the shortest segment containing all given words");
var para = "This is a test. This is a programming test. This is a programming test in any language.";
var searches = new string[] { "this", "a", "test", "programming" };
var words = RefinePara(para.ToLower()).Split(' ');
Console.WriteLine(\$"Final segment: {SolvePara(new List<string>(words), new List<string>(searches))}");

}

private static string SolvePara(List<string> words, List<string> searches)
{
var min = 0;
var max = 0;
var bingo = words;
var found = false;
for (int i = 0; i < words.Count; i++)
{
var tempSearch = new List<string>(searches).Select(t => t.ToLower()).ToList();
if (tempSearch.Contains(words[i], StringComparer.OrdinalIgnoreCase))
{
min = i;
max = min;
tempSearch.Remove(words[i]);
for (int j = min + 1; j < words.Count; j++)
{
if (tempSearch.Contains(words[j], StringComparer.OrdinalIgnoreCase))
{
max = j;
tempSearch.Remove(words[j]);
if (tempSearch.Count == 0)
{
if (max != min && max - min + 1 < bingo.Count)
{
bingo = words.GetRange(min, max - min + 1);
found = true;
}
break;
}
}
}
}
}
return !found ? "no segment found" : string.Join(" ", bingo);
}

private static string RefinePara(string para)
{
return Regex.Replace(para, @"[;,.\t\r]|[\n]{2}", "");
}
}
}

• this
a
test
programming

Based on sliding window logic -

import java.util.*;
import java.io.*;

public class HelloWorld {
public static void main(String[] args) {

String input = "This is a test. This is a programming test. This is a programming test in any language.";

String[] arr = { "this", "a", "test", "programming", "this"};

HelloWorld soln = new HelloWorld();
System.out.println(soln.shortestSubSegment(input, arr));
}

public String shortestSubSegment(String input, String[] arr){
input = input.toLowerCase();
input = input.replaceAll("\\.", "");
Map<String, Integer> inputMap = new HashMap<>();

Map<String, Integer> currentMap = new HashMap<>();

for(String str:arr){
if(!inputMap.containsKey(str)){
inputMap.put(str, 1);
} else{
inputMap.put(str, inputMap.get(str)+1);
}
}

String[] inputArr = input.split(" ");

int len = arr.length;
int count = 0;
int start =0;
int minStart =0, minEnd =inputArr.length;

for(int i=0; i < inputArr.length; i++){

if(!inputMap.containsKey(inputArr[i])){
continue;
}

if(currentMap.getOrDefault(inputArr[i], 0) < inputMap.get(inputArr[i])){

currentMap.put(inputArr[i], currentMap.getOrDefault(inputArr[i], 0)+1);
count++;
} else{
currentMap.put(inputArr[i], currentMap.get(inputArr[i])+1);
}

// window
if(count == len){

while(!inputMap.containsKey(inputArr[start]) ||
inputMap.get(inputArr[start]) < currentMap.getOrDefault(inputArr[start], 0)){
if(currentMap.containsKey(inputArr[start])){
currentMap.put(inputArr[start], currentMap.get(inputArr[start])-1);
}

start++;
}

if((minEnd - minStart) > (i-start)){
minStart = start;
minEnd = i;
}

}

}

if(len != count) {
return "NO SUBSEGMENT FOUND";
}

String out ="";

for(int i= minStart; i<= minEnd; i++){
out+= inputArr[i]+" ";
}
return out;
}
}

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.