diff --git a/backend/crawling/crawling_demo_jikhaeng.ipynb b/backend/crawling/crawling_demo_jikhaeng.ipynb index 1ee7d5b8e..82b9ca9f6 100644 --- a/backend/crawling/crawling_demo_jikhaeng.ipynb +++ b/backend/crawling/crawling_demo_jikhaeng.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 2, "id": "e98fe0d3", "metadata": {}, "outputs": [ @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "8f905112", "metadata": {}, "outputs": [ @@ -178,14 +178,52 @@ "name": "stdout", "output_type": "stream", "text": [ - "๐Ÿ–ผ๏ธ [2] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: KT_๊ณต๊ณ 2.png\n", - "๐Ÿ–ผ๏ธ [3] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ๋‹ค์˜ฌํˆฌ์ž์ฆ๊ถŒ_๊ณต๊ณ 3.png\n", - "๐Ÿ–ผ๏ธ [4] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: NHN ํด๋ผ์šฐ๋“œ_๊ณต๊ณ 4.png\n", - "๐Ÿ–ผ๏ธ [5] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ๋”์กด๋น„์ฆˆ์˜จ_๊ณต๊ณ 5.png\n", - "๐Ÿ–ผ๏ธ [6] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ์—”์”จ์†Œํ”„ํŠธ_๊ณต๊ณ 6.png\n", - "๐Ÿ–ผ๏ธ [7] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ์นด์นด์˜ค_๊ณต๊ณ 7.png\n", - "โœ… [8] ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์šฐ๋Œ€์‚ฌํ•ญ/์ž๊ฒฉ์š”๊ฑด ํฌ๋กค๋ง ์™„๋ฃŒ\n", - "โœ… [9] ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์šฐ๋Œ€์‚ฌํ•ญ/์ž๊ฒฉ์š”๊ฑด ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "๐Ÿ–ผ๏ธ [2] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ์ฟ ํŒก_๊ณต๊ณ 2.png\n", + "โŒ [3] ์ด๋ฏธ์ง€ ํฌ๋กค๋ง ์‹คํŒจ: Message: no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id=\"root\"]/main/div[2]/div[1]/div[1]/div[4]/img\"}\n", + " (Session info: chrome=137.0.7151.69); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception\n", + "Stacktrace:\n", + "0 chromedriver 0x000000010504a654 cxxbridge1$str$ptr + 2723108\n", + "1 chromedriver 0x00000001050428c8 cxxbridge1$str$ptr + 2690968\n", + "2 chromedriver 0x0000000104b96714 cxxbridge1$string$len + 90428\n", + "3 chromedriver 0x0000000104bdd7c0 cxxbridge1$string$len + 381416\n", + "4 chromedriver 0x0000000104c1ede8 cxxbridge1$string$len + 649232\n", + "5 chromedriver 0x0000000104bd19c8 cxxbridge1$string$len + 332784\n", + "6 chromedriver 0x000000010500e278 cxxbridge1$str$ptr + 2476360\n", + "7 chromedriver 0x000000010501150c cxxbridge1$str$ptr + 2489308\n", + "8 chromedriver 0x0000000104fefa64 cxxbridge1$str$ptr + 2351412\n", + "9 chromedriver 0x0000000105011d94 cxxbridge1$str$ptr + 2491492\n", + "10 chromedriver 0x0000000104fe0d58 cxxbridge1$str$ptr + 2290728\n", + "11 chromedriver 0x0000000105031d60 cxxbridge1$str$ptr + 2622512\n", + "12 chromedriver 0x0000000105031eec cxxbridge1$str$ptr + 2622908\n", + "13 chromedriver 0x0000000105042514 cxxbridge1$str$ptr + 2690020\n", + "14 libsystem_pthread.dylib 0x00000001901cec0c _pthread_start + 136\n", + "15 libsystem_pthread.dylib 0x00000001901c9b80 thread_start + 8\n", + "\n", + "โŒ [4] ์ด๋ฏธ์ง€ ํฌ๋กค๋ง ์‹คํŒจ: Message: no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id=\"root\"]/main/div[2]/div[1]/div[1]/div[4]/img\"}\n", + " (Session info: chrome=137.0.7151.69); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception\n", + "Stacktrace:\n", + "0 chromedriver 0x000000010504a654 cxxbridge1$str$ptr + 2723108\n", + "1 chromedriver 0x00000001050428c8 cxxbridge1$str$ptr + 2690968\n", + "2 chromedriver 0x0000000104b96714 cxxbridge1$string$len + 90428\n", + "3 chromedriver 0x0000000104bdd7c0 cxxbridge1$string$len + 381416\n", + "4 chromedriver 0x0000000104c1ede8 cxxbridge1$string$len + 649232\n", + "5 chromedriver 0x0000000104bd19c8 cxxbridge1$string$len + 332784\n", + "6 chromedriver 0x000000010500e278 cxxbridge1$str$ptr + 2476360\n", + "7 chromedriver 0x000000010501150c cxxbridge1$str$ptr + 2489308\n", + "8 chromedriver 0x0000000104fefa64 cxxbridge1$str$ptr + 2351412\n", + "9 chromedriver 0x0000000105011d94 cxxbridge1$str$ptr + 2491492\n", + "10 chromedriver 0x0000000104fe0d58 cxxbridge1$str$ptr + 2290728\n", + "11 chromedriver 0x0000000105031d60 cxxbridge1$str$ptr + 2622512\n", + "12 chromedriver 0x0000000105031eec cxxbridge1$str$ptr + 2622908\n", + "13 chromedriver 0x0000000105042514 cxxbridge1$str$ptr + 2690020\n", + "14 libsystem_pthread.dylib 0x00000001901cec0c _pthread_start + 136\n", + "15 libsystem_pthread.dylib 0x00000001901c9b80 thread_start + 8\n", + "\n", + "๐Ÿ–ผ๏ธ [5] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ_๊ณต๊ณ 5.png\n", + "๐Ÿ–ผ๏ธ [6] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ํด๋กœ๋ด‡_๊ณต๊ณ 6.png\n", + "๐Ÿ–ผ๏ธ [7] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ์ฝ”์›จ์ด_๊ณต๊ณ 7.png\n", + "๐Ÿ–ผ๏ธ [8] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: NHN_๊ณต๊ณ 8.png\n", + "๐Ÿ–ผ๏ธ [9] ํ…์ŠคํŠธ ๋ฏธ์กด์žฌ โ†’ ์ด๋ฏธ์ง€ ์ €์žฅ ์™„๋ฃŒ: ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ_๊ณต๊ณ 9.png\n", "โœ… [10] ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์šฐ๋Œ€์‚ฌํ•ญ/์ž๊ฒฉ์š”๊ฑด ํฌ๋กค๋ง ์™„๋ฃŒ\n", "โœ… ์—‘์…€ ์ €์žฅ ์™„๋ฃŒ\n" ] @@ -304,7 +342,203 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, + "id": "558f983c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ํšŒ์‚ฌ๋ช…๊ฒฝ๋ ฅํ•™๋ ฅ๊ทผ๋ฌด์ง€์ง๊ตฐ์ด๋ฏธ์ง€๊ฒฝ๋กœ์šฐ๋Œ€์‚ฌํ•ญ์ž๊ฒฉ์š”๊ฑด
0์ฟ ํŒก์‹ ์ž…ํ•™์‚ฌ์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ์•ˆ๋“œ๋กœ์ด๋“œ, iOSdownloads/์ฟ ํŒก_๊ณต๊ณ 2.png
1ํŒ€๋ฆฌ๋ถ€๋œจ3~8๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€๊ธฐํƒ€์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, DevOpsยทSRENaNNaNNaN
2ํŒ€๋ฆฌ๋ถ€๋œจ2~8๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œNaNNaNNaN
3๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ๊ฒฝ๋ ฅ ๋ฌด๊ด€ํ•™์‚ฌ๊ฒฝ๊ธฐ๊ธฐํƒ€๊ต์œก, ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œdownloads/๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ_๊ณต๊ณ 5.png
4ํด๋กœ๋ด‡10๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ๊ธฐํƒ€์—”์ง€๋‹ˆ์–ด๋งยทR&D, ๋กœ๋ด‡SW, ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ํ”„๋ก ํŠธ์—”๋“œdownloads/ํด๋กœ๋ด‡_๊ณต๊ณ 6.png
5์ฝ”์›จ์ด7๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ์‚ฌ๋ฌผ์ธํ„ฐ๋„ท(IoT), DevOpsยทSREdownloads/์ฝ”์›จ์ด_๊ณต๊ณ 7.png
6NHN5๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€๊ฒฝ๊ธฐ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ์†Œํ”„ํŠธ์›จ์–ด ์—”์ง€๋‹ˆ์–ด, ๊ฒŒ์ž„๊ฐœ๋ฐœ(์„œ๋ฒ„)downloads/NHN_๊ณต๊ณ 8.png
7๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ3๋…„์ฐจ, 10๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€๊ฒฝ๊ธฐ์•ˆ๋“œ๋กœ์ด๋“œ, iOS, ํ”„๋ก ํŠธ์—”๋“œ, ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œdownloads/๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ_๊ณต๊ณ 9.png
8์•กํ‹ฐ๋ถ€ํ‚ค3~10๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€์ „๋ถ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ๋ฐฑ์—”๋“œ, ์›นํ’€์Šคํƒโ€ข ์ง€๋„ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข ๊ฒฐ์ œ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข B2C ํ”Œ๋žซํผ ๊ฐœ๋ฐœ ๊ฒฝ...โ€ข ๊ฐœ๋ฐœ 3๋…„ ์ด์ƒ ๊ฒฝ๋ ฅ์˜ ์ค‘๊ธ‰๊ฐœ๋ฐœ์ž\\nโ€ข IntelliJ ๊ฐ€๋Šฅ\\nโ€ข Spring ...
\n", + "
" + ], + "text/plain": [ + " ํšŒ์‚ฌ๋ช… ๊ฒฝ๋ ฅ ํ•™๋ ฅ ๊ทผ๋ฌด์ง€ ์ง๊ตฐ \\\n", + "0 ์ฟ ํŒก ์‹ ์ž… ํ•™์‚ฌ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ์•ˆ๋“œ๋กœ์ด๋“œ, iOS \n", + "1 ํŒ€๋ฆฌ๋ถ€๋œจ 3~8๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ธฐํƒ€ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, DevOpsยทSRE \n", + "2 ํŒ€๋ฆฌ๋ถ€๋œจ 2~8๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "3 ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ ๊ฒฝ๋ ฅ ๋ฌด๊ด€ ํ•™์‚ฌ ๊ฒฝ๊ธฐ ๊ธฐํƒ€๊ต์œก, ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "4 ํด๋กœ๋ด‡ 10๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ๊ธฐํƒ€์—”์ง€๋‹ˆ์–ด๋งยทR&D, ๋กœ๋ด‡SW, ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ํ”„๋ก ํŠธ์—”๋“œ \n", + "5 ์ฝ”์›จ์ด 7๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ์‚ฌ๋ฌผ์ธํ„ฐ๋„ท(IoT), DevOpsยทSRE \n", + "6 NHN 5๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ฒฝ๊ธฐ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ์†Œํ”„ํŠธ์›จ์–ด ์—”์ง€๋‹ˆ์–ด, ๊ฒŒ์ž„๊ฐœ๋ฐœ(์„œ๋ฒ„) \n", + "7 ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ 3๋…„์ฐจ, 10๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ฒฝ๊ธฐ ์•ˆ๋“œ๋กœ์ด๋“œ, iOS, ํ”„๋ก ํŠธ์—”๋“œ, ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "8 ์•กํ‹ฐ๋ถ€ํ‚ค 3~10๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ์ „๋ถ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ, ๋ฐฑ์—”๋“œ, ์›นํ’€์Šคํƒ \n", + "\n", + " ์ด๋ฏธ์ง€๊ฒฝ๋กœ \\\n", + "0 downloads/์ฟ ํŒก_๊ณต๊ณ 2.png \n", + "1 NaN \n", + "2 NaN \n", + "3 downloads/๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ_๊ณต๊ณ 5.png \n", + "4 downloads/ํด๋กœ๋ด‡_๊ณต๊ณ 6.png \n", + "5 downloads/์ฝ”์›จ์ด_๊ณต๊ณ 7.png \n", + "6 downloads/NHN_๊ณต๊ณ 8.png \n", + "7 downloads/๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ_๊ณต๊ณ 9.png \n", + "8 \n", + "\n", + " ์šฐ๋Œ€์‚ฌํ•ญ \\\n", + "0 \n", + "1 NaN \n", + "2 NaN \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 โ€ข ์ง€๋„ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข ๊ฒฐ์ œ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข B2C ํ”Œ๋žซํผ ๊ฐœ๋ฐœ ๊ฒฝ... \n", + "\n", + " ์ž๊ฒฉ์š”๊ฑด \n", + "0 \n", + "1 NaN \n", + "2 NaN \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 โ€ข ๊ฐœ๋ฐœ 3๋…„ ์ด์ƒ ๊ฒฝ๋ ฅ์˜ ์ค‘๊ธ‰๊ฐœ๋ฐœ์ž\\nโ€ข IntelliJ ๊ฐ€๋Šฅ\\nโ€ข Spring ... " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "d8af31f5", "metadata": {}, "outputs": [ @@ -321,11 +555,11 @@ "๐Ÿ”™ ๊ธฐ์กด ํƒญ ๋ณต๊ท€ ์™„๋ฃŒ\n", "\n", "โ–ถ๏ธ [3]๋ฒˆ์งธ ๊ณต๊ณ  ํด๋ฆญ โ†’ ์ƒˆ ํƒญ ์—ด๋ฆผ ์˜ˆ์ƒ\n", - "๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ URL ์ €์žฅ ์™„๋ฃŒ\n", + "โŒ ์ด๋ฏธ์ง€ URL ์ €์žฅ ์‹คํŒจ\n", "๐Ÿ”™ ๊ธฐ์กด ํƒญ ๋ณต๊ท€ ์™„๋ฃŒ\n", "\n", "โ–ถ๏ธ [4]๋ฒˆ์งธ ๊ณต๊ณ  ํด๋ฆญ โ†’ ์ƒˆ ํƒญ ์—ด๋ฆผ ์˜ˆ์ƒ\n", - "๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ URL ์ €์žฅ ์™„๋ฃŒ\n", + "โŒ ์ด๋ฏธ์ง€ URL ์ €์žฅ ์‹คํŒจ\n", "๐Ÿ”™ ๊ธฐ์กด ํƒญ ๋ณต๊ท€ ์™„๋ฃŒ\n", "\n", "โ–ถ๏ธ [5]๋ฒˆ์งธ ๊ณต๊ณ  ํด๋ฆญ โ†’ ์ƒˆ ํƒญ ์—ด๋ฆผ ์˜ˆ์ƒ\n", @@ -341,18 +575,16 @@ "๐Ÿ”™ ๊ธฐ์กด ํƒญ ๋ณต๊ท€ ์™„๋ฃŒ\n", "\n", "โ–ถ๏ธ [8]๋ฒˆ์งธ ๊ณต๊ณ  ํด๋ฆญ โ†’ ์ƒˆ ํƒญ ์—ด๋ฆผ ์˜ˆ์ƒ\n", - "โœ… ์šฐ๋Œ€์‚ฌํ•ญ ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข ํด๋ผ์šฐ๋“œ ํ™˜๊ฒฝ(...\n", - "โœ… ์ž๊ฒฉ์š”๊ฑด ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข ์ปดํ“จํ„ฐ ๊ณตํ•™ ๋˜...\n", + "๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ URL ์ €์žฅ ์™„๋ฃŒ\n", "๐Ÿ”™ ๊ธฐ์กด ํƒญ ๋ณต๊ท€ ์™„๋ฃŒ\n", "\n", "โ–ถ๏ธ [9]๋ฒˆ์งธ ๊ณต๊ณ  ํด๋ฆญ โ†’ ์ƒˆ ํƒญ ์—ด๋ฆผ ์˜ˆ์ƒ\n", - "โœ… ์šฐ๋Œ€์‚ฌํ•ญ ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข ์ด์ปค๋จธ์Šค ์—…๊ณ„ ...\n", - "โœ… ์ž๊ฒฉ์š”๊ฑด ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข Python์— ...\n", + "๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ URL ์ €์žฅ ์™„๋ฃŒ\n", "๐Ÿ”™ ๊ธฐ์กด ํƒญ ๋ณต๊ท€ ์™„๋ฃŒ\n", "\n", "โ–ถ๏ธ [10]๋ฒˆ์งธ ๊ณต๊ณ  ํด๋ฆญ โ†’ ์ƒˆ ํƒญ ์—ด๋ฆผ ์˜ˆ์ƒ\n", - "โœ… ์šฐ๋Œ€์‚ฌํ•ญ ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข ๋‹ค๊ตญ์–ด ์„œ๋น„์Šค์˜...\n", - "โœ… ์ž๊ฒฉ์š”๊ฑด ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข ์ตœ์†Œ ๋งŒ 5๋…„ ...\n", + "โœ… ์šฐ๋Œ€์‚ฌํ•ญ ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข ์ง€๋„ ๊ด€๋ จ ๊ฐœ๋ฐœ...\n", + "โœ… ์ž๊ฒฉ์š”๊ฑด ํฌ๋กค๋ง ์™„๋ฃŒ: โ€ข ๊ฐœ๋ฐœ 3๋…„ ์ด์ƒ...\n", "๐Ÿ”™ ๊ธฐ์กด ํƒญ ๋ณต๊ท€ ์™„๋ฃŒ\n", "\n", "โœ… ์—‘์…€ ์ €์žฅ ์™„๋ฃŒ\n" @@ -471,16 +703,204 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "fa236770", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ํšŒ์‚ฌ๋ช…๊ฒฝ๋ ฅํ•™๋ ฅ๊ทผ๋ฌด์ง€์ง๊ตฐ์šฐ๋Œ€์‚ฌํ•ญ์ž๊ฒฉ์š”๊ฑด์ด๋ฏธ์ง€๊ฒฝ๋กœ
0์ฟ ํŒก์‹ ์ž…ํ•™์‚ฌ์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
1ํŒ€๋ฆฌ๋ถ€๋œจ3~8๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€๊ธฐํƒ€์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ
2ํŒ€๋ฆฌ๋ถ€๋œจ2~8๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ
3๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ๊ฒฝ๋ ฅ ๋ฌด๊ด€ํ•™์‚ฌ๊ฒฝ๊ธฐ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
4ํด๋กœ๋ด‡10๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
5์ฝ”์›จ์ด7๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
6NHN5๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€๊ฒฝ๊ธฐ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
7๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ3๋…„์ฐจ, 10๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€๊ฒฝ๊ธฐ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
8์•กํ‹ฐ๋ถ€ํ‚ค3~10๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€์ „๋ถ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œโ€ข ์ง€๋„ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข ๊ฒฐ์ œ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข B2C ํ”Œ๋žซํผ ๊ฐœ๋ฐœ ๊ฒฝ...โ€ข ๊ฐœ๋ฐœ 3๋…„ ์ด์ƒ ๊ฒฝ๋ ฅ์˜ ์ค‘๊ธ‰๊ฐœ๋ฐœ์ž\\nโ€ข IntelliJ ๊ฐ€๋Šฅ\\nโ€ข Spring ...
\n", + "
" + ], + "text/plain": [ + " ํšŒ์‚ฌ๋ช… ๊ฒฝ๋ ฅ ํ•™๋ ฅ ๊ทผ๋ฌด์ง€ ์ง๊ตฐ \\\n", + "0 ์ฟ ํŒก ์‹ ์ž… ํ•™์‚ฌ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "1 ํŒ€๋ฆฌ๋ถ€๋œจ 3~8๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ธฐํƒ€ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "2 ํŒ€๋ฆฌ๋ถ€๋œจ 2~8๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "3 ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ ๊ฒฝ๋ ฅ ๋ฌด๊ด€ ํ•™์‚ฌ ๊ฒฝ๊ธฐ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "4 ํด๋กœ๋ด‡ 10๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "5 ์ฝ”์›จ์ด 7๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "6 NHN 5๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ฒฝ๊ธฐ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "7 ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ 3๋…„์ฐจ, 10๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ฒฝ๊ธฐ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "8 ์•กํ‹ฐ๋ถ€ํ‚ค 3~10๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ์ „๋ถ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "\n", + " ์šฐ๋Œ€์‚ฌํ•ญ \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 โ€ข ์ง€๋„ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข ๊ฒฐ์ œ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข B2C ํ”Œ๋žซํผ ๊ฐœ๋ฐœ ๊ฒฝ... \n", + "\n", + " ์ž๊ฒฉ์š”๊ฑด \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 โ€ข ๊ฐœ๋ฐœ 3๋…„ ์ด์ƒ ๊ฒฝ๋ ฅ์˜ ์ค‘๊ธ‰๊ฐœ๋ฐœ์ž\\nโ€ข IntelliJ ๊ฐ€๋Šฅ\\nโ€ข Spring ... \n", + "\n", + " ์ด๋ฏธ์ง€๊ฒฝ๋กœ \n", + "0 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "1 \n", + "2 \n", + "3 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "4 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "5 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "6 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "7 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "8 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] } ], "metadata": { "kernelspec": { - "display_name": "hoenv", + "display_name": "python313", "language": "python", "name": "python3" }, diff --git "a/backend/crawling/downloads/NHN_\352\263\265\352\263\2408.png" "b/backend/crawling/downloads/NHN_\352\263\265\352\263\2408.png" new file mode 100644 index 000000000..7ce51dc4b Binary files /dev/null and "b/backend/crawling/downloads/NHN_\352\263\265\352\263\2408.png" differ diff --git "a/backend/crawling/downloads/\353\204\244\354\235\264\353\262\204\355\201\264\353\235\274\354\232\260\353\223\234_\352\263\265\352\263\2405.png" "b/backend/crawling/downloads/\353\204\244\354\235\264\353\262\204\355\201\264\353\235\274\354\232\260\353\223\234_\352\263\265\352\263\2405.png" new file mode 100644 index 000000000..5422dbdad Binary files /dev/null and "b/backend/crawling/downloads/\353\204\244\354\235\264\353\262\204\355\201\264\353\235\274\354\232\260\353\223\234_\352\263\265\352\263\2405.png" differ diff --git "a/backend/crawling/downloads/\353\204\244\354\235\264\353\262\204\355\201\264\353\235\274\354\232\260\353\223\234_\352\263\265\352\263\2409.png" "b/backend/crawling/downloads/\353\204\244\354\235\264\353\262\204\355\201\264\353\235\274\354\232\260\353\223\234_\352\263\265\352\263\2409.png" new file mode 100644 index 000000000..6f7084ded Binary files /dev/null and "b/backend/crawling/downloads/\353\204\244\354\235\264\353\262\204\355\201\264\353\235\274\354\232\260\353\223\234_\352\263\265\352\263\2409.png" differ diff --git "a/backend/crawling/downloads/\354\275\224\354\233\250\354\235\264_\352\263\265\352\263\2407.png" "b/backend/crawling/downloads/\354\275\224\354\233\250\354\235\264_\352\263\265\352\263\2407.png" new file mode 100644 index 000000000..161c3587c Binary files /dev/null and "b/backend/crawling/downloads/\354\275\224\354\233\250\354\235\264_\352\263\265\352\263\2407.png" differ diff --git "a/backend/crawling/downloads/\354\277\240\355\214\241_\352\263\265\352\263\2402.png" "b/backend/crawling/downloads/\354\277\240\355\214\241_\352\263\265\352\263\2402.png" new file mode 100644 index 000000000..14b427300 Binary files /dev/null and "b/backend/crawling/downloads/\354\277\240\355\214\241_\352\263\265\352\263\2402.png" differ diff --git "a/backend/crawling/downloads/\355\201\264\353\241\234\353\264\207_\352\263\265\352\263\2406.png" "b/backend/crawling/downloads/\355\201\264\353\241\234\353\264\207_\352\263\265\352\263\2406.png" new file mode 100644 index 000000000..90bb74031 Binary files /dev/null and "b/backend/crawling/downloads/\355\201\264\353\241\234\353\264\207_\352\263\265\352\263\2406.png" differ diff --git "a/backend/crawling/\354\247\201\355\226\211_\355\201\254\353\241\244\353\247\201_\352\262\260\352\263\274.xlsx" "b/backend/crawling/\354\247\201\355\226\211_\355\201\254\353\241\244\353\247\201_\352\262\260\352\263\274.xlsx" new file mode 100644 index 000000000..23ae1e6c0 Binary files /dev/null and "b/backend/crawling/\354\247\201\355\226\211_\355\201\254\353\241\244\353\247\201_\352\262\260\352\263\274.xlsx" differ diff --git "a/backend/crawling/\354\261\204\354\232\251\352\263\265\352\263\240_\355\201\254\353\241\244\353\247\201\352\262\260\352\263\274.xlsx" "b/backend/crawling/\354\261\204\354\232\251\352\263\265\352\263\240_\355\201\254\353\241\244\353\247\201\352\262\260\352\263\274.xlsx" new file mode 100644 index 000000000..62c298565 Binary files /dev/null and "b/backend/crawling/\354\261\204\354\232\251\352\263\265\352\263\240_\355\201\254\353\241\244\353\247\201\352\262\260\352\263\274.xlsx" differ diff --git a/backend/test_crawl/applicant_crawler.py b/backend/test_crawl/applicant_crawler.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/test_crawl/certification_crawler.py b/backend/test_crawl/certification_crawler.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/test_crawl/insert_to_db.py b/backend/test_crawl/insert_to_db.py new file mode 100644 index 000000000..59a17801c --- /dev/null +++ b/backend/test_crawl/insert_to_db.py @@ -0,0 +1,26 @@ +import os +from dotenv import load_dotenv +from sqlalchemy import create_engine, text +import pandas as pd + + +# 1. ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ +load_dotenv() +DATABASE_URL = os.getenv("DATABASE_URL") +print("๐Ÿ”— DATABASE_URL:", DATABASE_URL) + +# 2. DB ์—ฐ๊ฒฐ +engine = create_engine(DATABASE_URL) + +# 3. ์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ + ๋ฐ์ดํ„ฐ ์กฐํšŒ +try: + with engine.connect() as conn: + conn.execute(text("SELECT 1;")) # SQLAlchemy 2.0+์—์„œ๋Š” text() ํ•„์š” + print("โœ… DB ์—ฐ๊ฒฐ ์„ฑ๊ณต") + + # ์‹ค์ œ ํ…Œ์ด๋ธ”์—์„œ 5๊ฐœ๋งŒ ์กฐํšŒ + df = pd.read_sql("SELECT * FROM certifications LIMIT 5;", con=engine) + print("๐Ÿ“ฆ certifications ํ…Œ์ด๋ธ” ์ƒ˜ํ”Œ:\n", df) + +except Exception as e: + print("โŒ DB ์—ฐ๊ฒฐ ์‹คํŒจ:", e) \ No newline at end of file diff --git a/backend/test_crawl/recruit_crawler.py b/backend/test_crawl/recruit_crawler.py new file mode 100644 index 000000000..0ae280578 --- /dev/null +++ b/backend/test_crawl/recruit_crawler.py @@ -0,0 +1,173 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import pandas as pd +import time + +def crawl_zighang(job_name="์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ", max_clicks=10): + options = Options() + options.add_argument("--headless") # ํ•„์š” ์‹œ ์ œ๊ฑฐ ๊ฐ€๋Šฅ + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + driver = webdriver.Chrome(options=options) + driver.set_window_size(1280, 1024) + wait = WebDriverWait(driver, 20) + + try: + driver.get("https://zighang.com/it") + time.sleep(2) + + arrow_xpath = '//*[@id="root"]/main/div[3]/div/div/div/div/div[2]/div/section/button[2]/div/img' + wait.until(EC.element_to_be_clickable((By.XPATH, arrow_xpath))).click() + time.sleep(1) + + job_button_xpath = f'//button[normalize-space()="{job_name}"]' + wait.until(EC.element_to_be_clickable((By.XPATH, job_button_xpath))).click() + time.sleep(1) + + confirm_button = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.sticky.bottom-0 button.bg-primary'))) + driver.execute_script("arguments[0].scrollIntoView(true);", confirm_button) + driver.execute_script("arguments[0].click();", confirm_button) + wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'p.ds-web-title2'))) + + original_tab = driver.current_window_handle + results = [] + + for i in range(2, max_clicks + 1): + try: + title_xpath = f'(//p[contains(@class, "ds-web-title2")])[{i}]' + title_elem = wait.until(EC.presence_of_element_located((By.XPATH, title_xpath))) + driver.execute_script("arguments[0].scrollIntoView(true);", title_elem) + driver.execute_script("window.scrollBy(0, -200);") + time.sleep(0.3) + + parent_link = title_elem.find_element(By.XPATH, "./ancestor::a[1]") + driver.execute_script("arguments[0].click();", parent_link) + time.sleep(2) + + new_tab = [tab for tab in driver.window_handles if tab != original_tab][0] + driver.switch_to.window(new_tab) + + data = {} + data["ํšŒ์‚ฌ๋ช…"] = driver.find_element(By.XPATH, '//*[@id="root"]/main/div[2]/div[1]/div[1]/div[1]/div[1]/div/a').text + data["๊ฒฝ๋ ฅ"] = driver.find_element(By.XPATH, '//*[@id="root"]/main/div[2]/div[1]/div[1]/div[1]/div[5]/div/section/div[1]/div/div').text + data["ํ•™๋ ฅ"] = driver.find_element(By.XPATH, '//*[@id="root"]/main/div[2]/div[1]/div[1]/div[1]/div[5]/div/section/div[3]/div/div').text + data["๊ทผ๋ฌด์ง€"] = driver.find_element(By.XPATH, '//*[@id="root"]/main/div[2]/div[1]/div[1]/div[1]/div[5]/div/section/div[2]/div/div').text + data["์ง๊ตฐ"] = job_name + + try: + data["์šฐ๋Œ€์‚ฌํ•ญ"] = driver.find_element(By.XPATH, '//h2[text()="์šฐ๋Œ€์‚ฌํ•ญ"]/following-sibling::p').text + except: + data["์šฐ๋Œ€์‚ฌํ•ญ"] = "" + + try: + data["์ž๊ฒฉ์š”๊ฑด"] = driver.find_element(By.XPATH, '//h2[text()="์ž๊ฒฉ์š”๊ฑด"]/following-sibling::p').text + except: + data["์ž๊ฒฉ์š”๊ฑด"] = "" + + if data["์šฐ๋Œ€์‚ฌํ•ญ"] == "" and data["์ž๊ฒฉ์š”๊ฑด"] == "": + try: + img_elem = driver.find_element(By.XPATH, '//*[@id="root"]/main/div[2]/div[1]/div[1]/div[4]/img') + data["์ด๋ฏธ์ง€๊ฒฝ๋กœ"] = img_elem.get_attribute("src") + except: + data["์ด๋ฏธ์ง€๊ฒฝ๋กœ"] = "" + else: + data["์ด๋ฏธ์ง€๊ฒฝ๋กœ"] = "" + + results.append(data) + driver.close() + driver.switch_to.window(original_tab) + + except Exception as e: + print(f"โŒ [{i}]๋ฒˆ์งธ ๊ณต๊ณ  ์‹คํŒจ: {e}") + continue + + return pd.DataFrame(results) + + finally: + driver.quit() + + + + +def crawl_linkareer(max_pages=5): + chrome_options = Options() + chrome_options.add_experimental_option("detach", True) + chrome_options.add_argument("--no-sandbox") + chrome_options.add_argument("--disable-dev-shm-usage") + # chrome_options.add_argument("--headless") # ํ•„์š” ์‹œ ์ฃผ์„ ์ œ๊ฑฐ + driver = webdriver.Chrome(options=chrome_options) + wait = WebDriverWait(driver, 10) + + results = [] + + try: + for page in range(1, max_pages + 1): + list_url = f"https://linkareer.com/list/recruit?filterBy_activityTypeID=5&filterBy_categoryIDs=58&filterBy_status=OPEN&orderBy_direction=DESC&orderBy_field=RECENT&page={page}" + driver.get(list_url) + time.sleep(2) + print(f"๐Ÿ“„ {page}ํŽ˜์ด์ง€ ์ ‘์† ์™„๋ฃŒ") + + main_window = driver.current_window_handle + row_count = len(driver.find_elements(By.XPATH, '//*[@id="__next"]/div[1]/div/main/div/section/div[2]/table/tbody/tr')) + print(f"๐Ÿ” {row_count}๊ฐœ์˜ ๊ณต๊ณ  ํƒ์ƒ‰ ์˜ˆ์ •") + + for i in range(1, row_count + 1): + try: + link_element = driver.find_element(By.XPATH, f'//*[@id="__next"]/div[1]/div/main/div/section/div[2]/table/tbody/tr[{i}]/td[2]/div/a/div/p') + link_element.click() + + driver.switch_to.window(driver.window_handles[-1]) + + company_name = wait.until(EC.presence_of_element_located( + (By.XPATH, '//*[@id="__next"]/div[1]/div/main/div/div/section[1]/div/article/header/h2'))).text.strip() + company_type = wait.until(EC.presence_of_element_located( + (By.XPATH, '//*[@id="__next"]/div[1]/div/main/div/div/section[1]/div/article/div/dl[1]/dd'))).text.strip() + position_element = wait.until(EC.presence_of_element_located( + (By.XPATH, '//*[@id="__next"]/div[1]/div/main/div/div/section[1]/div/article/div/dl[5]/dd'))) + position = position_element.text.strip() + + p_elements = driver.find_elements(By.XPATH, '//*[@id="DETAIL"]/section[1]/div/p') + p_texts = [p.text.strip() for p in p_elements] + + qual_idx = next((idx for idx, text in enumerate(p_texts) if '์ž๊ฒฉ์š”๊ฑด' in text or '์ž๊ฒฉ ์š”๊ฑด' in text), None) + pos_idx = next((idx for idx, text in enumerate(p_texts) if '๋ชจ์ง‘ ์ง๋ฌด' in text or '์„ธ๋ถ€ ์ง๋ฌด' in text), None) + + qualification_texts = [] + if qual_idx is not None: + for t in p_texts[qual_idx + 1:]: + if any(keyword in t for keyword in ['์ง€์›', 'ํ˜œํƒ', '์šฐ๋Œ€', '๋‹ค์Œ', '๊ทผ๋ฌด']): + break + qualification_texts.append(t) + qualification = "\n".join(qualification_texts) + + detail_position = '' + if pos_idx is not None and pos_idx + 1 < len(p_texts): + detail_position = p_texts[pos_idx + 1] + + results.append({ + 'ํšŒ์‚ฌ๋ช…': company_name, + '๊ธฐ์—…ํ˜•ํƒœ': company_type, + '๋ชจ์ง‘์ง๋ฌด': position, + '์„ธ๋ถ€์ง๋ฌด': detail_position, + '์ž๊ฒฉ์š”๊ฑด': qualification + }) + + print(f"โœ… {company_name} ({i}/{row_count}, page {page}) ํฌ๋กค๋ง ์™„๋ฃŒ") + driver.close() + driver.switch_to.window(main_window) + time.sleep(1) + + except Exception as e: + print(f"โŒ {page}ํŽ˜์ด์ง€ {i}๋ฒˆ์งธ ๊ณต๊ณ  ์˜ค๋ฅ˜: {e}") + if len(driver.window_handles) > 1: + driver.close() + driver.switch_to.window(main_window) + continue + + return pd.DataFrame(results) + + finally: + driver.quit() \ No newline at end of file diff --git a/backend/test_crawl/selenium_basic/app.py b/backend/test_crawl/selenium_basic/app.py new file mode 100644 index 000000000..000847917 --- /dev/null +++ b/backend/test_crawl/selenium_basic/app.py @@ -0,0 +1,19 @@ +import time +from selenium import webdriver +from selenium.webdriver.common.by import By +import chromedriver_autoinstaller + +chromedriver_autoinstaller.install() + +driver = webdriver.Chrome() +# 1. ๋“œ๋ผ์ด๋ฒ„.get() ๋ฉ”์„œ๋“œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์›น ํŽ˜์ด์ง€๋ฅผ ์—ฐ๋‹ค. +driver.get("https://www.naver.com") +time.sleep(3) + +# 2. ์›น ํŽ˜์ด์ง€๊ฐ€ ์—ด๋ฆฌ๋ฉด, ์›น ํŽ˜์ด์ง€์˜ ์š”์†Œ๋ฅผ ์ฐพ๊ธฐ ์œ„ํ•ด find_element() ๋ฉ”์„œ๋“œ๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค. +css_selector = "#shortcutArea > ul > li:nth-child(8) > a > span.service_name" +group_navigation = driver.find_element(By.CSS_SELECTOR, css_selector) + +print(group_navigation.text) +group_navigation.click() +input() \ No newline at end of file diff --git a/backend/test_crawl/selenium_basic/selenium_tools.py b/backend/test_crawl/selenium_basic/selenium_tools.py new file mode 100644 index 000000000..620b52e59 --- /dev/null +++ b/backend/test_crawl/selenium_basic/selenium_tools.py @@ -0,0 +1,65 @@ +import time +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webrdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +import chromedriver_autoinstaller + +chromedriver_autoinstaller.install() + +driver = webdriver.Chrome() +driver.get("https://www.naver.com") + +# # 1. Navigation (์›น ํŽ˜์ด์ง€ ์ด๋™) ๊ด€๋ จ ํˆด +# # get, back, forward, refresh ๋ฉ”์„œ๋“œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์›น ํŽ˜์ด์ง€๋ฅผ ์ด๋™ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ์•Œ์•„๋ณด์ž. + +# # 1-1. get() ์›ํ•˜๋Š” ํŽ˜์ด์ง€๋กœ ์ด๋™ํ•˜๋Š” ํ•จ์ˆ˜ +# driver.get("https://www.naver.com") +# time.sleep(1) +# driver.get("https://www.google.com") + +# # 1-2. back() ์ด์ „ ํŽ˜์ด์ง€๋กœ ์ด๋™ํ•˜๋Š” ํ•จ์ˆ˜ +# driver.back() +# time.sleep(2) + +# # 1-3. forward() ๋‹ค์Œ ํŽ˜์ด์ง€๋กœ ์ด๋™ํ•˜๋Š” ํ•จ์ˆ˜ +# driver.forward() +# time.sleep(2) + +# # 1-4. refresh() ํ˜„์žฌ ํŽ˜์ด์ง€๋ฅผ ์ƒˆ๋กœ๊ณ ์นจํ•˜๋Š” ํ•จ์ˆ˜ +# driver.refresh() +# time.sleep(2) +# print("๋™์ž‘ ๋") +# input() + +# # 2.browser information +# # 2-1. title ~ ์›น ์‚ฌ์ดํŠธ์˜ ์ œ๋ชฉ์„ ๊ฐ€์ ธ์˜ค๋Š” ํ•จ์ˆ˜ +# title = driver.title +# print("์ œ๋ชฉ:", title) +# # 2-2. current_url ~ ํ˜„์žฌ ์›น ํŽ˜์ด์ง€์˜ URL์„ ๊ฐ€์ ธ์˜ค๋Š” ํ•จ์ˆ˜ +# current_url = driver.current_url +# print("ํ˜„์žฌ URL:", current_url) + +# if "nid.naver.com" in current_url: +# print("์ง€๊ธˆ์€ ๋กœ๊ทธ์ธ ํ•˜๋Š” ๋กœ์ง์ด ํ•„์š”ํ•จ") +# else: +# print("๋„ค์ด๋ฒ„ ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€๊ฐ€ ์•„๋‹™๋‹ˆ๋‹ค.") + + +# 3. Driver Wait (๋“œ๋ผ์ด๋ฒ„ ๋Œ€๊ธฐ) +# 3-1. 3์ดˆ ๋•Œ ๋กœ๋”ฉ์ด ๋๋‚˜์„œ, element๊ฐ€ ์ฐพ์•„์ง. +# 3-2. 30์ดˆ ๊นŒ์ง€๋Š” ๊ธฐ๋‹ค๋ฆฌ๊ฒ ์Œ. +# 3-3. 30์ดˆ๊ฐ€ ๋„˜์–ด๊ฐ€๋ฉด ์—๋Ÿฌ๋˜์ง + +try: + selector = "#shortcutArea > ul > li:nth-child(8) > a > span.service_name" + WebDriverWait(driver, 30).until(EC.presence_of_element_located( + By.CSS_SELECTOR, selector + )) +except: + print("์˜ˆ์™ธ ๋ฐœ์ƒ, ์˜ˆ์™ธ ์ฒ˜๋ฆฌ ์ฝ”๋“œ ์‹คํ–‰ํ•˜๊ธฐ") +print("์—˜๋ฆฌ๋จผํŠธ ๋กœ๋”ฉ ๋") +print("๋‹ค์Œ ์ฝ”๋“œ ์‹คํ–‰") + +input() \ No newline at end of file diff --git a/backend/test_crawl/test.ipynb b/backend/test_crawl/test.ipynb new file mode 100644 index 000000000..d03af762e --- /dev/null +++ b/backend/test_crawl/test.ipynb @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "c10a33ae", + "metadata": {}, + "outputs": [], + "source": [ + "from recruit_crawler import crawl_linkareer, crawl_zighang" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "85e1eaf8", + "metadata": {}, + "outputs": [], + "source": [ + "df = crawl_zighang(max_clicks=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cc84c195", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ํšŒ์‚ฌ๋ช…๊ฒฝ๋ ฅํ•™๋ ฅ๊ทผ๋ฌด์ง€์ง๊ตฐ์šฐ๋Œ€์‚ฌํ•ญ์ž๊ฒฉ์š”๊ฑด์ด๋ฏธ์ง€๊ฒฝ๋กœ
0์ฟ ํŒก์‹ ์ž…ํ•™์‚ฌ์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
1ํŒ€๋ฆฌ๋ถ€๋œจ3~8๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€๊ธฐํƒ€์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ
2ํŒ€๋ฆฌ๋ถ€๋œจ2~8๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ
3๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ๊ฒฝ๋ ฅ ๋ฌด๊ด€ํ•™์‚ฌ๊ฒฝ๊ธฐ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
4ํด๋กœ๋ด‡10๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
5์ฝ”์›จ์ด7๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€์„œ์šธ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
6NHN5๋…„์ฐจ ์ด์ƒํ•™๋ ฅ ๋ฌด๊ด€๊ฒฝ๊ธฐ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
7๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ3๋…„์ฐจ, 10๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€๊ฒฝ๊ธฐ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œhttps://d2juy7qzamcf56.cloudfront.net/2025-06-...
8์•กํ‹ฐ๋ถ€ํ‚ค3~10๋…„์ฐจํ•™๋ ฅ ๋ฌด๊ด€์ „๋ถ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œโ€ข ์ง€๋„ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข ๊ฒฐ์ œ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข B2C ํ”Œ๋žซํผ ๊ฐœ๋ฐœ ๊ฒฝ...โ€ข ๊ฐœ๋ฐœ 3๋…„ ์ด์ƒ ๊ฒฝ๋ ฅ์˜ ์ค‘๊ธ‰๊ฐœ๋ฐœ์ž\\nโ€ข IntelliJ ๊ฐ€๋Šฅ\\nโ€ข Spring ...
\n", + "
" + ], + "text/plain": [ + " ํšŒ์‚ฌ๋ช… ๊ฒฝ๋ ฅ ํ•™๋ ฅ ๊ทผ๋ฌด์ง€ ์ง๊ตฐ \\\n", + "0 ์ฟ ํŒก ์‹ ์ž… ํ•™์‚ฌ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "1 ํŒ€๋ฆฌ๋ถ€๋œจ 3~8๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ธฐํƒ€ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "2 ํŒ€๋ฆฌ๋ถ€๋œจ 2~8๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "3 ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ ๊ฒฝ๋ ฅ ๋ฌด๊ด€ ํ•™์‚ฌ ๊ฒฝ๊ธฐ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "4 ํด๋กœ๋ด‡ 10๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "5 ์ฝ”์›จ์ด 7๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ์„œ์šธ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "6 NHN 5๋…„์ฐจ ์ด์ƒ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ฒฝ๊ธฐ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "7 ๋„ค์ด๋ฒ„ํด๋ผ์šฐ๋“œ 3๋…„์ฐจ, 10๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ๊ฒฝ๊ธฐ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "8 ์•กํ‹ฐ๋ถ€ํ‚ค 3~10๋…„์ฐจ ํ•™๋ ฅ ๋ฌด๊ด€ ์ „๋ถ ์„œ๋ฒ„ยท๋ฐฑ์—”๋“œ \n", + "\n", + " ์šฐ๋Œ€์‚ฌํ•ญ \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 โ€ข ์ง€๋„ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข ๊ฒฐ์ œ ๊ด€๋ จ ๊ฐœ๋ฐœ ๊ฒฝํ—˜์ž\\nโ€ข B2C ํ”Œ๋žซํผ ๊ฐœ๋ฐœ ๊ฒฝ... \n", + "\n", + " ์ž๊ฒฉ์š”๊ฑด \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 โ€ข ๊ฐœ๋ฐœ 3๋…„ ์ด์ƒ ๊ฒฝ๋ ฅ์˜ ์ค‘๊ธ‰๊ฐœ๋ฐœ์ž\\nโ€ข IntelliJ ๊ฐ€๋Šฅ\\nโ€ข Spring ... \n", + "\n", + " ์ด๋ฏธ์ง€๊ฒฝ๋กœ \n", + "0 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "1 \n", + "2 \n", + "3 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "4 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "5 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "6 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "7 https://d2juy7qzamcf56.cloudfront.net/2025-06-... \n", + "8 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "44657c9c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“„ 1ํŽ˜์ด์ง€ ์ ‘์† ์™„๋ฃŒ\n", + "๐Ÿ” 20๊ฐœ์˜ ๊ณต๊ณ  ํƒ์ƒ‰ ์˜ˆ์ •\n", + "โœ… ์ฃผ์‹ํšŒ์‚ฌ ๋งˆ์นด๋กฑํŒฉํ† ๋ฆฌ (1/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… Deloitte ์•ˆ์ง„ํšŒ๊ณ„๋ฒ•์ธ (2/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… SK์— ์•ค์„œ๋น„์Šค (3/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์Šคํƒ€์‰ฝ์—”ํ„ฐํ…Œ์ธ๋จผํŠธ (4/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์ผ๋™์ œ์•ฝ (5/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ํœด๋น„์ธ  (6/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ํ•ํŽซ (7/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ํŽ„์–ด๋น„์Šค (8/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์Ž„๋ฏน์Šค (9/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์ฃผ์‹ํšŒ์‚ฌ ํผํฌ์ฆˆ๋“œ (10/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… SM์—”ํ„ฐํ…Œ์ธ๋จผํŠธ (11/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ํ•œ๊ตญ์‚ฌํšŒ๋ณด์žฅ์ •๋ณด์› (12/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์•Œ์„ธ๋ฏธ (13/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์ปดํˆฌ์Šค (14/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์ปดํˆฌ์Šค (15/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… SNOW (16/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ์ปดํˆฌ์Šค (17/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… Deloitte ์•ˆ์ง„ํšŒ๊ณ„๋ฒ•์ธ (18/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ํ™”์Šน์ฝ”ํผ๋ ˆ์ด์…˜ (19/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n", + "โœ… ํ…Œ์Šฌ๋ผ์ฝ”๋ฆฌ์•„ (20/20, page 1) ํฌ๋กค๋ง ์™„๋ฃŒ\n" + ] + } + ], + "source": [ + "df2 = crawl_linkareer(max_pages=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d30ce1fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ํšŒ์‚ฌ๋ช…๊ธฐ์—…ํ˜•ํƒœ๋ชจ์ง‘์ง๋ฌด์„ธ๋ถ€์ง๋ฌด์ž๊ฒฉ์š”๊ฑด
0์ฃผ์‹ํšŒ์‚ฌ ๋งˆ์นด๋กฑํŒฉํ† ๋ฆฌ์Šคํƒ€ํŠธ์—…๊ฒฝ๊ธฐ ์„ฑ๋‚จ์‹œ ๋ถ„๋‹น๊ตฌ
1Deloitte ์•ˆ์ง„ํšŒ๊ณ„๋ฒ•์ธ์™ธ๊ตญ๊ณ„๊ธฐ์—…์„œ์šธ ์˜๋“ฑํฌ๊ตฌ
2SK์— ์•ค์„œ๋น„์Šค๋Œ€๊ธฐ์—…์„œ์šธ ์ค‘๊ตฌ
3์Šคํƒ€์‰ฝ์—”ํ„ฐํ…Œ์ธ๋จผํŠธ์ค‘์†Œ๊ธฐ์—…์„œ์šธ ๊ฐ•๋‚จ๊ตฌ์˜จ๋ผ์ธ ์‡ผํ•‘๋ชฐ ์šด์˜ ๋ฐ CS ๋‹ด๋‹น์žํ•™๋ ฅ : ์ดˆ๋Œ€์กธ ์ด์ƒ\\n๊ฒฝ๋ ฅ : ๋ฌด๊ด€(์‹ ์ž…/๊ฒฝ๋ ฅ)\\n์—”ํ„ฐํ…Œ์ธ๋จผํŠธ ๋ฐ ๋ฌธํ™” ์‚ฌ์—…์— ๊ด€...
4์ผ๋™์ œ์•ฝ์ค‘๊ฒฌ๊ธฐ์—…์„œ์šธ ์„œ์ดˆ๊ตฌ
5ํœด๋น„์ธ ์ค‘์†Œ๊ธฐ์—…๊ฒฝ๊ธฐ ์•ˆ์–‘์‹œ ๋™์•ˆ๊ตฌ- ๋Œ€์กธ ์ด์ƒ(์ „์ž ๊ด€๋ จ ์ „๊ณต)\\n- ๋น„์ฆˆ๋‹ˆ์Šค ์˜์–ด ํšŒํ™” ๊ฐ€๋Šฅ์ž(TOEIC 700์ ...
6ํ•ํŽซ์ค‘์†Œ๊ธฐ์—…์„œ์šธ ๊ฐ•๋‚จ๊ตฌ
7ํŽ„์–ด๋น„์Šค์ค‘๊ฒฌ๊ธฐ์—…IT/์ธํ„ฐ๋„ทQA BeginnerMMORPG ํ”Œ๋ ˆ์ด ๊ฒฝํ—˜์ด ๋งŽ์œผ์‹  ๋ถ„\\n์›ํ™œํ•œ ์ปค๋ฎค๋‹ˆ์ผ€์ด์…˜์ด ๊ฐ€๋Šฅํ•˜์‹  ๋ถ„\\n\\n[์ „...
8์Ž„๋ฏน์Šค์ค‘์†Œ๊ธฐ์—…๊ฒฝ๊ธฐ ์„ฑ๋‚จ์‹œ ๋ถ„๋‹น๊ตฌ- ์‹ ์ž… / ๊ฒฝ๋ ฅ 1๋…„ ์ด์ƒ 5๋…„ ์ดํ•˜\\n- ์ดˆ๋Œ€์กธ ์ด์ƒ\\n\\n[ํ•ฉ๋ฅ˜์—ฌ์ •]\\n- ์„œ...
9์ฃผ์‹ํšŒ์‚ฌ ํผํฌ์ฆˆ๋“œ์ค‘์†Œ๊ธฐ์—…์„œ์šธ ๊ฐ•๋‚จ๊ตฌ์ฝ˜ํ…์ธ  ๋งˆ์ผ€ํ„ฐ๋””์ž์ธ ๋ฐ ์˜์ƒ ์ œ์ž‘ ํˆด ์‚ฌ์šฉ ๊ฒฝํ—˜ ํฌํ† ์ƒต, ์–ด๋„๋น„ ํ”„๋ฆฌ๋ฏธ์–ด, ์–ด๋„๋น„ ์ผ๋Ÿฌ์ŠคํŠธ๋ ˆ์ดํ„ฐ...
10SM์—”ํ„ฐํ…Œ์ธ๋จผํŠธ๋Œ€๊ธฐ์—…์„œ์šธ ์„ฑ๋™๊ตฌ
11ํ•œ๊ตญ์‚ฌํšŒ๋ณด์žฅ์ •๋ณด์›๊ณต๊ณต๊ธฐ๊ด€/๊ณต๊ธฐ์—…์„œ์šธ ๊ด‘์ง„๊ตฌ
12์•Œ์„ธ๋ฏธ์ค‘์†Œ๊ธฐ์—…์„œ์šธ ๊ฐ•๋‚จ๊ตฌSoftware Engineerโ€ข ์ปดํ“จํ„ฐ ๊ณผํ•™ ๋˜๋Š” ๊ด€๋ จ ๊ณตํ•™ ํ•™์‚ฌ ๋˜๋Š” ์ด์™€ ๋™๋“ฑํ•œ ๊ฒฝํ—˜\\nโ€ข ๊ธฐ๋ณธ์ ์ธ ์ž๋ฐ”(J...
13์ปดํˆฌ์Šค์ค‘๊ฒฌ๊ธฐ์—…์„œ์šธ ๊ธˆ์ฒœ๊ตฌ
14์ปดํˆฌ์Šค์ค‘๊ฒฌ๊ธฐ์—…์„œ์šธ ๊ธˆ์ฒœ๊ตฌ
15SNOW๋Œ€๊ธฐ์—…๊ฒฝ๊ธฐ ์„ฑ๋‚จ์‹œ ๋ถ„๋‹น๊ตฌ
16์ปดํˆฌ์Šค์ค‘๊ฒฌ๊ธฐ์—…์„œ์šธ ๊ธˆ์ฒœ๊ตฌ
17Deloitte ์•ˆ์ง„ํšŒ๊ณ„๋ฒ•์ธ์ค‘๊ฒฌ๊ธฐ์—…์„œ์šธ ์˜๋“ฑํฌ๊ตฌ
18ํ™”์Šน์ฝ”ํผ๋ ˆ์ด์…˜๋Œ€๊ธฐ์—…ํ•ด์™ธ, ์„œ์šธ ์˜๋“ฑํฌ๊ตฌ, ๊ฒฝ๋‚จ ์–‘์‚ฐ์‹œ, ๋ถ€์‚ฐ ๊ธฐ์žฅ๊ตฐ, ๋ถ€์‚ฐ ์—ฐ์ œ๊ตฌํ™”์Šน์ฝ”ํผ๋ ˆ์ด์…˜_์ž๊ธˆ- ํ•™์‚ฌ ์ด์ƒ
19ํ…Œ์Šฌ๋ผ์ฝ”๋ฆฌ์•„์™ธ๊ตญ๊ณ„๊ธฐ์—…์„œ์šธ ๊ฐ•๋‚จ๊ตฌ
\n", + "
" + ], + "text/plain": [ + " ํšŒ์‚ฌ๋ช… ๊ธฐ์—…ํ˜•ํƒœ ๋ชจ์ง‘์ง๋ฌด \\\n", + "0 ์ฃผ์‹ํšŒ์‚ฌ ๋งˆ์นด๋กฑํŒฉํ† ๋ฆฌ ์Šคํƒ€ํŠธ์—… ๊ฒฝ๊ธฐ ์„ฑ๋‚จ์‹œ ๋ถ„๋‹น๊ตฌ \n", + "1 Deloitte ์•ˆ์ง„ํšŒ๊ณ„๋ฒ•์ธ ์™ธ๊ตญ๊ณ„๊ธฐ์—… ์„œ์šธ ์˜๋“ฑํฌ๊ตฌ \n", + "2 SK์— ์•ค์„œ๋น„์Šค ๋Œ€๊ธฐ์—… ์„œ์šธ ์ค‘๊ตฌ \n", + "3 ์Šคํƒ€์‰ฝ์—”ํ„ฐํ…Œ์ธ๋จผํŠธ ์ค‘์†Œ๊ธฐ์—… ์„œ์šธ ๊ฐ•๋‚จ๊ตฌ \n", + "4 ์ผ๋™์ œ์•ฝ ์ค‘๊ฒฌ๊ธฐ์—… ์„œ์šธ ์„œ์ดˆ๊ตฌ \n", + "5 ํœด๋น„์ธ  ์ค‘์†Œ๊ธฐ์—… ๊ฒฝ๊ธฐ ์•ˆ์–‘์‹œ ๋™์•ˆ๊ตฌ \n", + "6 ํ•ํŽซ ์ค‘์†Œ๊ธฐ์—… ์„œ์šธ ๊ฐ•๋‚จ๊ตฌ \n", + "7 ํŽ„์–ด๋น„์Šค ์ค‘๊ฒฌ๊ธฐ์—… IT/์ธํ„ฐ๋„ท \n", + "8 ์Ž„๋ฏน์Šค ์ค‘์†Œ๊ธฐ์—… ๊ฒฝ๊ธฐ ์„ฑ๋‚จ์‹œ ๋ถ„๋‹น๊ตฌ \n", + "9 ์ฃผ์‹ํšŒ์‚ฌ ํผํฌ์ฆˆ๋“œ ์ค‘์†Œ๊ธฐ์—… ์„œ์šธ ๊ฐ•๋‚จ๊ตฌ \n", + "10 SM์—”ํ„ฐํ…Œ์ธ๋จผํŠธ ๋Œ€๊ธฐ์—… ์„œ์šธ ์„ฑ๋™๊ตฌ \n", + "11 ํ•œ๊ตญ์‚ฌํšŒ๋ณด์žฅ์ •๋ณด์› ๊ณต๊ณต๊ธฐ๊ด€/๊ณต๊ธฐ์—… ์„œ์šธ ๊ด‘์ง„๊ตฌ \n", + "12 ์•Œ์„ธ๋ฏธ ์ค‘์†Œ๊ธฐ์—… ์„œ์šธ ๊ฐ•๋‚จ๊ตฌ \n", + "13 ์ปดํˆฌ์Šค ์ค‘๊ฒฌ๊ธฐ์—… ์„œ์šธ ๊ธˆ์ฒœ๊ตฌ \n", + "14 ์ปดํˆฌ์Šค ์ค‘๊ฒฌ๊ธฐ์—… ์„œ์šธ ๊ธˆ์ฒœ๊ตฌ \n", + "15 SNOW ๋Œ€๊ธฐ์—… ๊ฒฝ๊ธฐ ์„ฑ๋‚จ์‹œ ๋ถ„๋‹น๊ตฌ \n", + "16 ์ปดํˆฌ์Šค ์ค‘๊ฒฌ๊ธฐ์—… ์„œ์šธ ๊ธˆ์ฒœ๊ตฌ \n", + "17 Deloitte ์•ˆ์ง„ํšŒ๊ณ„๋ฒ•์ธ ์ค‘๊ฒฌ๊ธฐ์—… ์„œ์šธ ์˜๋“ฑํฌ๊ตฌ \n", + "18 ํ™”์Šน์ฝ”ํผ๋ ˆ์ด์…˜ ๋Œ€๊ธฐ์—… ํ•ด์™ธ, ์„œ์šธ ์˜๋“ฑํฌ๊ตฌ, ๊ฒฝ๋‚จ ์–‘์‚ฐ์‹œ, ๋ถ€์‚ฐ ๊ธฐ์žฅ๊ตฐ, ๋ถ€์‚ฐ ์—ฐ์ œ๊ตฌ \n", + "19 ํ…Œ์Šฌ๋ผ์ฝ”๋ฆฌ์•„ ์™ธ๊ตญ๊ณ„๊ธฐ์—… ์„œ์šธ ๊ฐ•๋‚จ๊ตฌ \n", + "\n", + " ์„ธ๋ถ€์ง๋ฌด ์ž๊ฒฉ์š”๊ฑด \n", + "0 \n", + "1 \n", + "2 \n", + "3 ์˜จ๋ผ์ธ ์‡ผํ•‘๋ชฐ ์šด์˜ ๋ฐ CS ๋‹ด๋‹น์ž ํ•™๋ ฅ : ์ดˆ๋Œ€์กธ ์ด์ƒ\\n๊ฒฝ๋ ฅ : ๋ฌด๊ด€(์‹ ์ž…/๊ฒฝ๋ ฅ)\\n์—”ํ„ฐํ…Œ์ธ๋จผํŠธ ๋ฐ ๋ฌธํ™” ์‚ฌ์—…์— ๊ด€... \n", + "4 \n", + "5 - ๋Œ€์กธ ์ด์ƒ(์ „์ž ๊ด€๋ จ ์ „๊ณต)\\n- ๋น„์ฆˆ๋‹ˆ์Šค ์˜์–ด ํšŒํ™” ๊ฐ€๋Šฅ์ž(TOEIC 700์ ... \n", + "6 \n", + "7 QA Beginner MMORPG ํ”Œ๋ ˆ์ด ๊ฒฝํ—˜์ด ๋งŽ์œผ์‹  ๋ถ„\\n์›ํ™œํ•œ ์ปค๋ฎค๋‹ˆ์ผ€์ด์…˜์ด ๊ฐ€๋Šฅํ•˜์‹  ๋ถ„\\n\\n[์ „... \n", + "8 - ์‹ ์ž… / ๊ฒฝ๋ ฅ 1๋…„ ์ด์ƒ 5๋…„ ์ดํ•˜\\n- ์ดˆ๋Œ€์กธ ์ด์ƒ\\n\\n[ํ•ฉ๋ฅ˜์—ฌ์ •]\\n- ์„œ... \n", + "9 ์ฝ˜ํ…์ธ  ๋งˆ์ผ€ํ„ฐ ๋””์ž์ธ ๋ฐ ์˜์ƒ ์ œ์ž‘ ํˆด ์‚ฌ์šฉ ๊ฒฝํ—˜ ํฌํ† ์ƒต, ์–ด๋„๋น„ ํ”„๋ฆฌ๋ฏธ์–ด, ์–ด๋„๋น„ ์ผ๋Ÿฌ์ŠคํŠธ๋ ˆ์ดํ„ฐ... \n", + "10 \n", + "11 \n", + "12 Software Engineer โ€ข ์ปดํ“จํ„ฐ ๊ณผํ•™ ๋˜๋Š” ๊ด€๋ จ ๊ณตํ•™ ํ•™์‚ฌ ๋˜๋Š” ์ด์™€ ๋™๋“ฑํ•œ ๊ฒฝํ—˜\\nโ€ข ๊ธฐ๋ณธ์ ์ธ ์ž๋ฐ”(J... \n", + "13 \n", + "14 \n", + "15 \n", + "16 \n", + "17 \n", + "18 ํ™”์Šน์ฝ”ํผ๋ ˆ์ด์…˜_์ž๊ธˆ - ํ•™์‚ฌ ์ด์ƒ \n", + "19 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "405b7b7f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python313", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}