clipScrape/videoScrape.py at master · sw882882/clipScrape · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
import datetime as dt

print("Starting...")
print(
    """
This stream clip finder (or whatever you want to call it) works by finding times peak engagement
in chat. This, for the most part works pretty smoothly, but sometimes there can be overlapping
outputs due to the way it works sort of  "chunking" popular parts of the stream together. This
happens because the "chunk" is too long or too short. Currently it is set so it is set by the
user. So please enter the likely length of the clips/engaging moments in your target stream.
      """
)
length = float(input("Length in Minutes: ")) * 60
# Load the data from a CSV file, assuming the timestamp column is named "timestamp"
print("processing csv generated previously...")
df = pd.read_csv("./working/frequency.csv")
averageFrequency = df["averaged"].mean()

# Convert the timestamp column to datetime format
df["timestamp"] = pd.to_datetime(df["timestamp"], format="%H:%M:%S")

# Sort the dataframe by the frequency column in descending order
df = df.sort_values(by="averaged", ascending=False)

# Create an empty dataframe to store the selected rows
selected_df = pd.DataFrame()

# Loop until all rows have been selected or excluded
while len(df) > 0:
    # Select the row with the highest frequency
    row = df.iloc[0]

    # Calculate the time range of 2.5 minutes before and after the selected row
    time_range = pd.date_range(
        row["timestamp"] - dt.timedelta(seconds=length),
        row["timestamp"] + dt.timedelta(seconds=length),
        freq="S",
    )

    # Delete all rows in the time range from the original dataframe
    df = df[~df["timestamp"].isin(time_range)]

    # Add the selected row to the selected dataframe
    selected_df = pd.concat(
        [selected_df, row.to_frame().transpose()], ignore_index=True
    )

# Convert back to readable format
selected_df["timestamp"] = selected_df["timestamp"].dt.strftime("%H:%M:%S")

# Drop everything below the average
selected_df = selected_df[selected_df["averaged"] > averageFrequency]

# Print the selected dataframe
print("processing complete")
print("These are likely funny/engaging moments throughout the stream")
selected_df = selected_df.drop(df.columns[[0]], axis=1)
selected_df.reset_index(drop=True)
print(selected_df.to_string())
selected_df.to_csv("./working/filtered.csv")
print("the results have been saved in ./working/filtered.csv")

# TODO give user option to see certain clip from the dataframe

print("enter one of the indexes to play the clip in MPV (or enter q to quit)")

# make some unescapable loop unless the user presses q, repeat to give the users access to bunch of things
# 1 mpv
# 2 download
# 3 eventually nlp