Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
552ae6d
Dockerize Paper Hacker News
mihaigalos Oct 19, 2020
94472bb
Set regenerating interval to 300s
mihaigalos Oct 19, 2020
847a277
Use refresh interval variable defaulting to 300 seconds
mihaigalos Oct 19, 2020
fbfb54f
Merge pull request #1 from mihaigalos/dockerize_paper_hacker_news
mihaigalos Oct 19, 2020
3712c1f
Fix TypeError [ERR_INVALID_URL]: Invalid URL: undefined
mihaigalos Oct 25, 2020
4405526
Add own repo because of upstream not maintained
mihaigalos Oct 25, 2020
0e91d02
Abstractify source
mihaigalos Oct 25, 2020
2c36c9c
Add lobste.rs as source, separate docker file
mihaigalos Oct 25, 2020
0fdfcdf
Add lobste.rs feed
mihaigalos Oct 25, 2020
ec1edb6
Reenable jobs
mihaigalos Oct 25, 2020
a8bc088
Reformat, remove superfluous
mihaigalos Oct 25, 2020
b7bf240
Improve naming
mihaigalos Oct 25, 2020
0d0e8a6
Remove superfluous
mihaigalos Oct 25, 2020
d851e86
Recusevly delete all cached *.json files
mihaigalos Oct 25, 2020
2809f8e
Improve naming
mihaigalos Oct 25, 2020
2aa3fd3
Merge pull request #3 from mihaigalos/abstractify_source
mihaigalos Oct 25, 2020
d0fa8ad
feat: Use latest node
mihaigalos Feb 23, 2024
df234fc
fix: Install once we have the package.json
mihaigalos Feb 23, 2024
d65c89f
fix: Debug
mihaigalos Feb 23, 2024
3caa8b2
fix: Debug
mihaigalos Feb 23, 2024
5ff8b8f
fix: Debug
mihaigalos Feb 23, 2024
f67d5f0
fix: Debug
mihaigalos Feb 23, 2024
3309528
fix: Debug
mihaigalos Feb 23, 2024
ee89000
fix: Debug
mihaigalos Feb 23, 2024
68b268c
fix: Debug
mihaigalos Feb 23, 2024
cbd4c89
fix: Debug
mihaigalos Feb 23, 2024
d59bf88
fix: Debug
mihaigalos Feb 24, 2024
ea2789b
fix: Debug
mihaigalos Feb 24, 2024
73e3a92
fix: Debug
mihaigalos Feb 24, 2024
45b840e
feat: Cronjob for refreshing of cache
mihaigalos Feb 24, 2024
76536ed
feat: Timezone
mihaigalos Feb 24, 2024
04c4f21
feat: Cronjob for refreshing of cache
mihaigalos Feb 24, 2024
a85a8e6
feat: Docker - Harden
mihaigalos Feb 24, 2024
b1356f9
feat: Docker - Remove cron, cleanup in main loop
mihaigalos Feb 24, 2024
0ea46c2
chore: Docker - Improve verbosity
mihaigalos Feb 24, 2024
61e1f3f
chore: Docker - Improve verbosity
mihaigalos Feb 24, 2024
9448f62
chore: Docker - Allow date
mihaigalos Feb 24, 2024
5d6159d
chore: Logic - Update to fix breakage due to punycode
Oct 17, 2024
b6dbdb0
chore: Docker - Timeout generation
mihaigalos Mar 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM node

RUN adduser user
USER user


USER user
WORKDIR /home/user
COPY . .
RUN yarn install --modules-folder=/home/user/node_modules
COPY /scripts/main_loop.sh .
COPY /scripts/harden.sh .

USER root
RUN apt update && DEBIAN_FRONTEND=noninteractive apt-get install --yes nginx

ENV TZ=Europe/Vienna
ENV LANG=en_US.UTF-8 \
LANGUAGE=en_US.UTF-8

COPY nginx.conf /etc/nginx/nginx.conf
RUN mkdir -p /var/lib/nginx
RUN chown -R user:user /var/lib/nginx
RUN chown -R user:user /var/log/nginx
RUN chown -R user:user .

RUN touch /run/nginx.pid && chown -R user:user /run/nginx.pid

RUN ./harden.sh user
USER user

CMD /home/user/main_loop.sh
123 changes: 85 additions & 38 deletions bin/generate-html.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,50 +6,97 @@ import fs from 'fs-extra'
import psl from 'psl'
import html2text from 'html2plaintext'

function getArgs () {
const args = {};
process.argv
.slice(2, process.argv.length)
.forEach( arg => {
// long arg
if (arg.slice(0,2) === '--') {
const longArg = arg.split('=');
const longArgFlag = longArg[0].slice(2,longArg[0].length);
const longArgValue = longArg.length > 1 ? longArg[1] : true;
args[longArgFlag] = longArgValue;
}
// flags
else if (arg[0] === '-') {
const flags = arg.slice(1,arg.length).split('');
flags.forEach(flag => {
args[flag] = true;
});
}
});
return args;
}

;(async () => {
await fs.ensureDir('cache/hn/item')
await fs.ensureDir('cache/url')

await init_titles()


const args = getArgs();

const stories = []
for (const storyid of (await hnget('topstories')).slice(0, 30)) {
const story = await getitem(storyid)
if (story.type !== 'story') continue // Filter out jobs
story.keyword = title_keyword(story.title)
if (story.text) { // Self post
story.url = 'https://news.ycombinator.com/item?id='+story.id
story.domain = 'news.ycombinator.com'
story.paragraph = html2text(story.text.split('<p>')[0])
story.image = false
} else {
story.domain = psl.parse(new URL(story.url).hostname).domain
const info = await page_info(story.url)
story.paragraph = info.paragraph
story.image = info.image
}
stories.push(story)
}

const jobs = []
for (const jobid of (await hnget('jobstories')).slice(0, 3)) {
const job = await getitem(jobid)
job.domain = psl.parse(new URL(job.url).hostname).domain
const split = job.title.split(' ')
const splitix = (split.findIndex(w => w.toLowerCase() === 'hiring') || 3)+1
job.title1 = split.slice(0, splitix).join(' ')
job.title2 = split.slice(splitix).join(' ')
jobs.push(job)
const source_urls=['https://hacker-news.firebaseio.com/v0/']
const story_urls=['https://news.ycombinator.com/item?id=']
const story_domains=['news.ycombinator.com']
const cache_paths=['hacker-news']


for (var i=0; i<source_urls.length; i++){
var source_url=source_urls[i]
var story_url=story_urls[i]
var story_domain=story_domains[i]
var cache_path=args[cache_paths[i]] // i.e. pass --hacker-news=hn (dirty hack but I'm not a JS dev.)

await fs.ensureDir(`cache/${cache_path}/item`)
await fs.ensureDir('cache/url')
await init_titles(cache_path)
}


for (var i=0; i<source_urls.length; i++){
var source_url=source_urls[i]
var story_url=story_urls[i]
var story_domain=story_domains[i]
var cache_path=args[cache_paths[i]]

console.log("Cache path: " + cache_path);

var storyids=await hnget(source_url, 'topstories',cache_path)

for (var storyid of (storyids).slice(0, 30)) {
var story = await getitem(source_url, storyid, cache_path)
if (story.type !== 'story') continue // Filter out jobs
story.keyword = title_keyword(story.title)
if (story.text) { // Self post
story.url = story_url+story.id
story.domain = story_domain
story.paragraph = html2text(story.text.split('<p>')[0])
story.image = false
} else {
story.domain = psl.parse(new URL(story.url).hostname).domain
const info = await page_info(story.url)
story.paragraph = info.paragraph
story.image = info.image
}
stories.push(story)
}

const jobstories = await hnget(source_url, 'jobstories', cache_path)
if(jobstories != null)
{
for (const jobid of jobstories.slice(0, 3)) {
const job = await getitem(source_url, jobid, cache_path)
const split = job.title.split(' ')
const splitix = (split.findIndex(w => w.toLowerCase() === 'hiring') || 3)+1
job.title1 = split.slice(0, splitix).join(' ')
job.title2 = split.slice(splitix).join(' ')
jobs.push(job)
}
}
}

await fs.writeFile('index.html', pug.renderFile('index.pug', {
stories,
jobs,
date: new Date(1000*Math.max(...stories.map(s => s.time))).toLocaleString('en-US', {
timeZone: 'UTC',
dateStyle: 'full',
timeStyle: 'short',
timeZoneName: 'short',
}),
date: new Intl.DateTimeFormat('en', { dateStyle: "medium", timeStyle: "medium" }).format(new Date()),
}))
})().then(null, err => {throw err})
17 changes: 10 additions & 7 deletions lib/hn.mjs
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
import fetch from 'node-fetch'
import fs from 'fs-extra'

export async function hnget(q) {
const cache = `cache/hn/${q}.json`
export async function hnget(source, q, cache_path) {
const cache = `cache/${cache_path}/${q}.json`
if (await fs.exists(cache)) {
return fs.readJson(cache)
}
const res = await fetch(`https://hacker-news.firebaseio.com/v0/${q}.json`)
if (!res.ok) {
throw new Error(`HTTP error: ${res.status} for ${q}`)
const url = `${source}${q}.json`

const res = await fetch(url)
if (!res.ok)
{
throw new Error(`HTTP error: ${res.status} for ${source}${q}.json`)
}
const ret = await res.json()
await fs.writeJson(cache, ret)
return ret
}

export async function getitem(itemid) {
return await hnget(`item/${itemid}`)
export async function getitem(source,itemid, cache_path) {
return await hnget(source,`item/${itemid}`, cache_path)
}
6 changes: 3 additions & 3 deletions lib/keywords.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ async function titles_file() {
}

let tokcount
export async function init_titles() {
export async function init_titles(cache_path) {
const titles = await titles_file()
for (const storyfile of await fs.readdir('cache/hn/item/')) {
for (const storyfile of await fs.readdir(`cache/${cache_path}/item/`)) {
const storyid = storyfile.split('.')[0]
const story = await getitem(storyid)
if (story.type !== 'story') {
Expand All @@ -43,7 +43,7 @@ export async function init_titles() {
titles[story.id] = story.title
}
await fs.writeJson('titles.json', titles)

tokcount = {}
for (const storyid in titles) {
const title = titles[storyid]
Expand Down
7 changes: 4 additions & 3 deletions lib/page-extract.mjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import crypto from 'crypto'
import fetch from 'node-fetch'
import fs from 'fs-extra'
import cheerio from 'cheerio'
import * as cheerio from "cheerio"
import compare from 'compare-function'
import URLToolkit from 'url-toolkit'
import htmlEntities from 'html-entities'
Expand All @@ -20,7 +20,8 @@ async function gethtml(url) {
}
const res = await fetch(url)
if (!res.ok) {
throw new Error(`HTTP error: ${res.status} for ${url}`)
console.log(`HTTP error. Skipping: ${res.status} for ${url}`)
return ''
}
let ret
if (res.headers.get('Content-Type').startsWith('text/html')) {
Expand Down Expand Up @@ -132,7 +133,7 @@ export async function page_info(url) {
normalizeWhitespace: true,
}})
$('script').remove()

return {
paragraph: getParagraph($),
image: getImage(url, $),
Expand Down
40 changes: 40 additions & 0 deletions nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
events {

}

http {
include /etc/nginx/conf.d/*.conf;
server {
listen 8080 default_server;
listen [::]:8080 default_server;
server_name _;
#root /usr/share/nginx/html;

# Load configuration files for the default server block.
include /etc/nginx/default.d/*.conf;

location / {
root /home/user;
autoindex on;
}
location = /health {
access_log off;
add_header 'Content-Type' 'application/json';
return 200 '{"status":"OK"}';
}
location = /ready {
access_log off;
add_header 'Content-Type' 'application/json';
return 200 '{"status":"OK"}';
}


error_page 404 /404.html;
location = /40x.html {
}

error_page 500 502 503 504 /50x.html;
location = /50x.html {
}
}
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"fs-extra": "^9.0.1",
"html-entities": "^1.3.1",
"html2plaintext": "^2.1.2",
"punycode": "^2.3.1",
"node-fetch": "^2.6.0",
"nodemon": "^2.0.4",
"psl": "^1.8.0",
Expand Down
Loading