3,417 Tweets

1,634 tweets scraped from Theo, Dax, Ben Awad, Sahil Lavingia, Aiden Bai and Guillermo Rauch Profiles at scripts/json-to-csv/tweets.json

// You're supposed to run this script in web browser's console only.

(() => {
  window.currentChunk = [];
  const scraped = new Set();
  let chunk = 1;
  const CHUNK_SIZE = 100;

  const saveChunk = () => {
    const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
    const a = document.createElement("a");
    a.href = URL.createObjectURL(blob);
    a.download = `tweets_${chunk++}.json`;
    a.click();
    URL.revokeObjectURL(a.href);
    console.log(`Saved ${CHUNK_SIZE} tweets as tweets_${chunk - 1}.json`);
    window.currentChunk = [];
  };

  const extractTweets = () => {
    const articles = document.querySelectorAll("article");
    articles.forEach((article) => {
      const textEl = article.querySelector('div[data-testid="tweetText"]');
      const userEl = article.querySelector('div[dir="ltr"] > span');
      const statGroup = article.querySelector('div[role="group"]');
      if (!textEl || !userEl || !statGroup) return;

      let replies = null, reposts = null, likes = null, views = null;
      statGroup.querySelectorAll('[aria-label]').forEach((el) => {
        const label = el.getAttribute("aria-label")?.toLowerCase() || "";
        const value = label.match(/([\d.,Kk]+)/)?.[1]?.replace(/,/g, "") || null;
        if (label.includes("reply")) replies = value;
        else if (label.includes("repost")) reposts = value;
        else if (label.includes("like")) likes = value;
        else if (label.includes("view")) views = value;
      });

      const text = textEl?.innerText?.trim();
      const username = userEl?.innerText?.trim();
      const id = `${username}::${text}`;
      if (text && username && !scraped.has(id)) {
        window.currentChunk.push({ username, text, replies, reposts, likes, views });
        scraped.add(id);
        console.log(`[${window.currentChunk.length}] @${username}: ${text}`);
        if (window.currentChunk.length >= CHUNK_SIZE) saveChunk();
      }
    });
  };

  const observer = new MutationObserver(() => extractTweets());
  observer.observe(document.body, { childList: true, subtree: true });

  window.scrollInterval = setInterval(() => window.scrollBy(0, 1000), 1500);

  window.stopScroll = () => {
    clearInterval(window.scrollInterval);
    if (window.currentChunk.length > 0) {
      const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
      const a = document.createElement("a");
      a.href = URL.createObjectURL(blob);
      a.download = `tweets_final_${window.currentChunk.length}.json`;
      a.click();
      URL.revokeObjectURL(a.href);
      console.log("Final partial chunk saved.");
    } else {
      console.log("Stopped. No tweets left to save.");
    }
  };

  console.log("Scraper started. Will auto-save every 100 tweets and flush memory each time.");
})();

// You can stop the scroll as well by this
// though but whenever it reaches the end
// of the profile which is not specifically
// the end but that's the rate limited area
// to scroll.


stopScroll();

Name		Name	Last commit message	Last commit date
Latest commit History 4 Commits
dataset		dataset
models		models
raw_data		raw_data
scripts		scripts
.gitattributes		.gitattributes
LICENSE		LICENSE
README.md		README.md
requirements.txt		requirements.txt

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

3,417 Tweets

About

Uh oh!

Releases

Packages

Uh oh!

Contributors

Uh oh!

Languages

Folders and files

Latest commit

History

Repository files navigation

3,417 Tweets

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Uh oh!

Contributors

Uh oh!

Languages

Packages