Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ private String buildBatchPrompt(List<EmailBatchItem> items) {
- help
- privacy
- settings
- account management

If no job-related link exists:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public void connectAndSetupPush(String authCode, String email) throws Exception

userRepository.saveAndFlush(user);

log.info("Gmail Automation enabled with 1 DB transaction for: {}", user.getEmail());
log.info("User {} successfully connected Gmail. Watch set with label ID: {}", email, labelId);
}

@Async("taskExecutor")
Expand Down Expand Up @@ -151,8 +151,6 @@ public void initiateManualSync(String email) {
String currentHistoryId = service.users().getProfile("me").execute().getHistoryId().toString();

jobService.finalizeManualSync(email, currentHistoryId);

log.info("Manual sync finished for {}. Found {} jobs.", email, found);
} catch (Exception e) {
log.error("Manual sync failed for {}: {}", email, e.getMessage());
} finally {
Expand Down Expand Up @@ -340,8 +338,6 @@ public void disconnectGmail(String email) {
userRepository.saveAndFlush(user);

cleanupGoogleResourcesAsync(refreshToken, labelId);

log.info("User {} disconnected from Gmail. Local state cleared.", email);
}

@Async("taskExecutor")
Expand All @@ -358,7 +354,6 @@ protected void cleanupGoogleResourcesAsync(String refreshToken, String labelId)
.uri("https://oauth2.googleapis.com/revoke?token=" + refreshToken)
.retrieve();

log.info("Google resources cleaned up and token revoked.");
} catch (Exception e) {
log.warn("Non-critical: Google resource cleanup failed: {}", e.getMessage());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Service
@Slf4j
Expand All @@ -50,19 +52,14 @@ public GmailWebhookService(GeminiService geminiService, JobService jobService, U
@Async("taskExecutor")
public void processHistorySync(String userEmail) {
final String email = userEmail.toLowerCase();

LocalDateTime now = LocalDateTime.now();
LocalDateTime expiryThreshold = now.minusMinutes(15);

int updatedRows = userRepository.claimSyncLock(email, now, expiryThreshold);
if (updatedRows == 0) return;

cacheEvictService.evictAllForUser(email);
if (userRepository.claimSyncLock(email, now, expiryThreshold) == 0) return;

try {
User user = userRepository.findByEmail(email)
.orElseThrow(() -> new RuntimeException("User not found after lock"));

User user = userRepository.findByEmail(email).orElseThrow();
if (user.getGmailRefreshToken() == null) return;

String accessToken = getFreshAccessToken(user.getGmailRefreshToken());
Expand All @@ -87,15 +84,12 @@ public void processHistorySync(String userEmail) {
List<EmailBatchItem> batchItems = collectMessages(service, historyResponse.getHistory());

if (!batchItems.isEmpty()) {

List<JobDTO> extractedJobs = geminiService.extractJobsFromBatch(batchItems);

log.info("Ingesting batch of {} emails via Gemini for {}", batchItems.size(), email);

jobService.saveBatchResults(email, batchItems, extractedJobs);
}
log.info("Ingesting batch of {} emails for {}", batchItems.size(), email);
List<JobDTO> extractedJobs = geminiService.extractJobsFromBatch(batchItems);
jobService.saveBatchResults(email, batchItems, extractedJobs);
}
} catch (Exception e) {
log.error("High-Performance Sync failed for {}: ", email, e);
log.error("Sync failed for {}: ", email, e);
} finally {
userRepository.releaseSyncLock(email);
cacheEvictService.evictAllForUser(email);
Expand All @@ -110,45 +104,105 @@ private List<EmailBatchItem> collectMessages(Gmail service, List<History> histor
if (history.getMessagesAdded() == null) continue;
for (HistoryMessageAdded added : history.getMessagesAdded()) {
try {
Message m = service.users().messages().get("me", added.getMessage().getId())
.setFormat("full").execute();

long millisecondTimestamp = m.getInternalDate();
LocalDateTime emailDate = LocalDateTime.ofInstant(
Instant.ofEpochMilli(millisecondTimestamp), ZoneOffset.UTC);
Message m = service.users().messages().get("me", added.getMessage().getId()).setFormat("full").execute();
LocalDateTime emailDate = LocalDateTime.ofInstant(Instant.ofEpochMilli(m.getInternalDate()), ZoneOffset.UTC);

String from = "", subj = "", replyTo="";
String from = "", subj = "", replyTo = "";
for (var h : m.getPayload().getHeaders()) {
if ("From".equalsIgnoreCase(h.getName())) from = h.getValue();
if ("Subject".equalsIgnoreCase(h.getName())) subj = h.getValue();
if ("Reply-To".equalsIgnoreCase(h.getName())) replyTo = h.getValue();
}

if (!isSystemNoise(subj)) {
String body = extractTextFromBody(m.getPayload());
String body = extractProcessedBody(m.getPayload());
items.add(new EmailBatchItem(from, subj, replyTo, body, emailDate));
}
} catch (Exception e) {
log.warn("Failed to fetch message {}: {}", added.getMessage().getId(), e.getMessage());
log.warn("Failed message fetch: {}", e.getMessage());
}
}
}
return items;
}

private String extractTextFromBody(MessagePart part) {
private String extractProcessedBody(MessagePart payload) {
StringBuilder rawBuffer = new StringBuilder();
recursiveRawCollect(payload, rawBuffer);

String cleaned = surgicalClean(rawBuffer.toString());

return cleaned;
}

private void recursiveRawCollect(MessagePart part, StringBuilder buffer) {
if (part.getParts() != null) {
for (MessagePart subPart : part.getParts()) recursiveRawCollect(subPart, buffer);
}
if (part.getBody() != null && part.getBody().getData() != null) {
String content = new String(Base64.getUrlDecoder().decode(part.getBody().getData()));
if (part.getMimeType().contains("text/plain")) return content;
if (part.getMimeType().contains("text/html")) return content.replaceAll("<[^>]*>", " ");
buffer.append(new String(Base64.getUrlDecoder().decode(part.getBody().getData()))).append("\n");
}
if (part.getParts() != null) {
for (MessagePart subPart : part.getParts()) {
String text = extractTextFromBody(subPart);
if (text != null && !text.isBlank()) return text;
}

private String surgicalClean(String rawHtml) {
if (rawHtml == null || rawHtml.isBlank()) return "";

String content = rawHtml.replaceAll("(?is)<style.*?>.*?</style>", "")
.replaceAll("(?is)<script.*?>.*?</script>", "");

StringBuilder sb = new StringBuilder();
Matcher m = Pattern.compile("(?is)<a\\s+[^>]*?href\\s*=\\s*[\"']([^\"']+)[\"'][^>]*?>(.*?)</a>").matcher(content);

int lastEnd = 0;
while (m.find()) {
sb.append(content, lastEnd, m.start());

String rawUrl = m.group(1).replace("&amp;", "&");
String linkText = m.group(2).replaceAll("<[^>]*>", "").trim();

String processedUrl = processUrlByDomain(rawUrl);

boolean isJobLink = processedUrl.contains("viewjob") || processedUrl.contains("confirmemail") ||
processedUrl.contains("linkedin.com/jobs") || processedUrl.contains("careers") ||
processedUrl.contains("apply");

if (isJobLink && processedUrl.length() > 15) {
sb.append(" [LINK_START]").append(linkText).append("[LINK_URL]").append(processedUrl).append("[LINK_END] ");
} else {
sb.append(" ").append(linkText).append(" ");
}

lastEnd = m.end();
}
sb.append(content.substring(lastEnd));

return sb.toString()
.replaceAll("(?i)<br\\s*/?>", "\n")
.replaceAll("(?i)</td>", " ")
.replaceAll("<[^>]*>", " ")
.replaceAll("&nbsp;", " ")
.replaceAll("\\s+", " ")
.trim();
}

private String processUrlByDomain(String url) {
if (url == null) return "";
String lowerUrl = url.toLowerCase();

if (lowerUrl.contains("linkedin.com/jobs") || lowerUrl.contains("linkedin.com/comm/jobs")) {
int queryIndex = url.indexOf("?");
return queryIndex > 0 ? url.substring(0, queryIndex) : url;
}
return "";

if (lowerUrl.contains("indeed.com")) {
return url;
}

if (url.contains("utm_") || url.contains("ref=")) {
return url.replaceAll("[?&]utm_[^&]+", "").replaceAll("[?&]ref=[^&]+", "");
}

return url;
}

private void bootstrapUserHistory(Gmail service, User user) throws Exception {
Expand All @@ -164,13 +218,6 @@ private boolean isSystemNoise(String subject) {
return s.contains("security alert") || s.contains("sign-in") || s.contains("verification code");
}

// private void evictUserCaches(String email) {
// Cache userCache = cacheManager.getCache("users");
// Cache entityCache = cacheManager.getCache("userEntities");
// if (userCache != null) userCache.evict(email);
// if (entityCache != null) entityCache.evict(email);
// }

public String getFreshAccessToken(String refreshToken) throws Exception {
return new GoogleRefreshTokenRequest(GoogleNetHttpTransport.newTrustedTransport(), GsonFactory.getDefaultInstance(),
refreshToken, clientId, clientSecret).execute().getAccessToken();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public void handleManualForward(String from, String subject, String body, String
if (user == null) return;

if (Boolean.TRUE.equals(user.getGmailConnected())) {
log.info("Discarding forwarded email for {}: Direct Sync is active.", userEmail);
log.warn("Discarding forwarded email for {}: Direct Sync is active.", userEmail);
return;
}

Expand All @@ -39,7 +39,6 @@ public void handleManualForward(String from, String subject, String body, String

if (job != null) {
jobService.createOrUpdateJob(job, userEmail);
log.info("Successfully ingested forwarded job: {} for {}", job.getCompany(), userEmail);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public void saveBatchResults(String email, List<EmailBatchItem> batchItems, List

List<List<String>> batchUrlLists = batchItems.parallelStream()
.map(item -> UrlParser.extractAndCleanUrls(item.body()))
.toList();
.toList();
for (JobDTO job : extractedJobs) {
Integer idx = job.getInputIndex();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,50 @@

public class UrlParser {

private static final Pattern URL_PATTERN = Pattern.compile("https?://[a-zA-Z0-9./?=&%_\\-]+");
private static final Pattern URL_PATTERN = Pattern.compile("(https?://|www\\.)[a-zA-Z0-9./?=&%_\\-+]+(?<![.,!?:;])");

public static List<String> extractAndCleanUrls(String text) {
if (text == null) return List.of();
List<String> urls = new ArrayList<>();
Matcher matcher = URL_PATTERN.matcher(text);
while (matcher.find()) {
urls.add(cleanTrackingParams(matcher.group()));
String rawUrl = matcher.group();
urls.add(processUrlByDomain(rawUrl));
}
return urls.stream().distinct().collect(Collectors.toList());
return urls.stream()
.filter(url -> !url.isBlank())
.distinct()
.collect(Collectors.toList());
}

private static String cleanTrackingParams(String url) {
int qIndex = url.indexOf("?");
return qIndex > 0 ? url.substring(0, qIndex) : url;

private static String processUrlByDomain(String url) {
String lowerUrl = url.toLowerCase();

if (lowerUrl.contains("indeed.com")) {
return url;
}

if (lowerUrl.contains("linkedin.com") || lowerUrl.contains("utm_") || lowerUrl.contains("ref=") || lowerUrl.contains("trk=")) {
int qIndex = url.indexOf("?");
return qIndex > 0 ? url.substring(0, qIndex) : url;
}

return url;
}

public static String trimNoise(String body) {
if (body == null) return "";
String[] markers = {"View similar jobs", "Unsubscribe", "©", "Help Center", "References"};

String cleanBody = body.replaceAll("(?is)<style.*?>.*?</style>", "")
.replaceAll("(?is)<script.*?>.*?</script>", "");

String[] markers = {"View similar jobs", "Unsubscribe", "©", "Help Center", "References", "Privacy Policy"};
for (String marker : markers) {
int index = body.indexOf(marker);
if (index > 0) body = body.substring(0, index);
int index = cleanBody.indexOf(marker);
if (index > 0) cleanBody = cleanBody.substring(0, index);
}
return body.length() > 3000 ? body.substring(0, 3000 ) : body;

return cleanBody.length() > 3000 ? cleanBody.substring(0, 3000) : cleanBody;
}
}
7 changes: 7 additions & 0 deletions backend/src/main/resources/application-prod.properties
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ spring.jpa.properties.hibernate.jdbc.batch_size=25
spring.jpa.properties.hibernate.order_inserts=true
spring.jpa.properties.hibernate.order_updates=true

# Logging Levels
logging.level.root=WARN
logging.level.org.springframework=WARN
logging.level.org.hibernate=WARN
logging.level.com.thughari.jobtrackerpro=WARN
logging.level.com.thughari.jobtrackerpro.scheduler=INFO

# Gemini AI
app.gemini.enabled=true
gemini.api.key=${GEMINI_API_KEY}
Expand Down
Loading