remotemono
IPCVector_Impl.h
1 /*
2  Copyright 2020 David "Alemarius Nexus" Lerch
3 
4  This file is part of RemoteMono.
5 
6  RemoteMono is free software: you can redistribute it and/or modify
7  it under the terms of the GNU Lesser General Public License as published
8  by the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  RemoteMono is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU Lesser General Public License for more details.
15 
16  You should have received a copy of the GNU Lesser General Public License
17  along with RemoteMono. If not, see <https://www.gnu.org/licenses/>.
18  */
19 
20 #pragma once
21 
22 #include "config.h"
23 
24 #include "IPCVector.h"
25 
26 #include <cstring>
27 #include "util.h"
28 #include "log.h"
29 
30 
31 using namespace blackbone;
32 
33 
34 
35 
36 namespace remotemono
37 {
38 
39 
40 template <typename ElemT, typename IntPtrT>
41 IPCVector<ElemT, IntPtrT>::IPCVector()
42  : process(nullptr), injected(false), remAPI(nullptr), code(nullptr)
43 {
44 }
45 
46 
47 template <typename ElemT, typename IntPtrT>
48 IPCVector<ElemT, IntPtrT>::~IPCVector()
49 {
50  uninject();
51 }
52 
53 
54 template <typename ElemT, typename IntPtrT>
55 typename IPCVector<ElemT, IntPtrT>::VectorPtr IPCVector<ElemT, IntPtrT>::vectorNew(uint32_t cap)
56 {
57  if (process) {
58  return *remAPI->vectorNew.Call({cap}, process->remote().getWorker());
59  } else {
60  return localApi.vectorNew(cap);
61  }
62 }
63 
64 
65 template <typename ElemT, typename IntPtrT>
66 void IPCVector<ElemT, IntPtrT>::vectorFree(VectorPtr v)
67 {
68  if (process) {
69  remAPI->vectorFree.Call({v}, process->remote().getWorker());
70  } else {
71  localApi.vectorFree(v);
72  }
73 }
74 
75 
76 template <typename ElemT, typename IntPtrT>
77 void IPCVector<ElemT, IntPtrT>::vectorAdd(VectorPtr v, ElemT elem)
78 {
79  if (process) {
80  remAPI->vectorAdd.Call({v, elem}, process->remote().getWorker());
81  } else {
82  localApi.vectorAdd(v, elem);
83  }
84 }
85 
86 
87 template <typename ElemT, typename IntPtrT>
88 void IPCVector<ElemT, IntPtrT>::vectorClear(VectorPtr v)
89 {
90  if (process) {
91  remAPI->vectorClear.Call({v}, process->remote().getWorker());
92  } else {
93  localApi.vectorClear(v);
94  }
95 }
96 
97 
98 template <typename ElemT, typename IntPtrT>
99 uint32_t IPCVector<ElemT, IntPtrT>::vectorLength(VectorPtr v)
100 {
101  if (process) {
102  return *remAPI->vectorLength.Call({v}, process->remote().getWorker());
103  } else {
104  return localApi.vectorLength(v);
105  }
106 }
107 
108 
109 template <typename ElemT, typename IntPtrT>
110 uint32_t IPCVector<ElemT, IntPtrT>::vectorCapacity(VectorPtr v)
111 {
112  if (process) {
113  return *remAPI->vectorCapacity.Call({v}, process->remote().getWorker());
114  } else {
115  return localApi.vectorCapacity(v);
116  }
117 }
118 
119 
120 template <typename ElemT, typename IntPtrT>
121 typename IPCVector<ElemT, IntPtrT>::DataPtr IPCVector<ElemT, IntPtrT>::vectorData(VectorPtr v)
122 {
123  if (process) {
124  return *remAPI->vectorData.Call({v}, process->remote().getWorker());
125  } else {
126  return localApi.vectorData(v);
127  }
128 }
129 
130 
131 template <typename ElemT, typename IntPtrT>
132 void IPCVector<ElemT, IntPtrT>::vectorGrow(VectorPtr v, uint32_t cap)
133 {
134  if (process) {
135  remAPI->vectorGrow.Call({v, cap}, process->remote().getWorker());
136  } else {
137  localApi.vectorGrow(v, cap);
138  }
139 }
140 
141 
142 template <typename ElemT, typename IntPtrT>
143 typename IPCVector<ElemT, IntPtrT>::VectorPtr IPCVector<ElemT, IntPtrT>::create(const std::vector<ElemT>& data)
144 {
145  VectorPtr v = vectorNew((uint32_t) data.size());
146  for (const ElemT& e : data) {
147  vectorAdd(v, e);
148  }
149  return v;
150 }
151 
152 
153 template <typename ElemT, typename IntPtrT>
154 void IPCVector<ElemT, IntPtrT>::read(VectorPtr v, std::vector<ElemT>& out)
155 {
156  uint32_t len = vectorLength(v);
157  out.resize(len);
158 
159  if (process) {
160  ProcessMemory& mem = process->memory();
161  mem.Read((ptr_t) vectorData(v), len*sizeof(ElemT), out.data());
162  } else {
163  memcpy(out.data(), (void*) (uintptr_t) vectorData(v), len*sizeof(ElemT));
164  }
165 }
166 
167 
168 template <typename ElemT, typename IntPtrT>
169 void IPCVector<ElemT, IntPtrT>::inject(Process* process)
170 {
171  using namespace asmjit;
172  using namespace asmjit::host;
173 
174 
175  if (injected) {
176  return;
177  }
178 
179  this->process = process;
180 
181  ProcessModules* modules = process ? &process->modules() : nullptr;
182  ProcessMemory* mem = process ? &process->memory() : nullptr;
183 
184  bool x64 = (sizeof(IntPtrT) == 8);
185 
186  RMonoLogVerbose("Assembling IPCVector functions for %s", x64 ? "x64" : "x86");
187 
188  auto asmPtr = AsmFactory::GetAssembler(!x64);
189  auto& a = *asmPtr;
190 
191 
192  Label lVectorGrow = a->newLabel();
193  Label lVectorNew = a->newLabel();
194  Label lVectorFree = a->newLabel();
195  Label lVectorAdd = a->newLabel();
196  Label lVectorClear = a->newLabel();
197  Label lVectorLength = a->newLabel();
198  Label lVectorCapacity = a->newLabel();
199  Label lVectorData = a->newLabel();
200 
201  ptr_t pHeapAlloc;
202  ptr_t pHeapReAlloc;
203  ptr_t pHeapFree;
204  ptr_t pGetProcessHeap;
205 
206  if (process) {
207  auto k32 = modules->GetModule(L"kernel32.dll");
208 
209  pHeapAlloc = modules->GetExport(k32, "HeapAlloc")->procAddress;
210  pHeapReAlloc = modules->GetExport(k32, "HeapReAlloc")->procAddress;
211  pHeapFree = modules->GetExport(k32, "HeapFree")->procAddress;
212  pGetProcessHeap = modules->GetExport(k32, "GetProcessHeap")->procAddress;
213  } else {
214  pHeapAlloc = (ptr_t) &HeapAlloc;
215  pHeapReAlloc = (ptr_t) &HeapReAlloc;
216  pHeapFree = (ptr_t) &HeapFree;
217  pGetProcessHeap = (ptr_t) &GetProcessHeap;
218  }
219 
220 
221  // IMPORTANT: Make sure that each function's prolog aligns the stack to 16 bytes on x64 if it calls other functions.
222  // It's off by 8 bytes at prolog start because of the return address pushed by `call`.
223 
224  // TODO: In 64-bit mode, do we actually need to call these WIN32 API functions using x64 calling conventions, or are
225  // they still using 32-bit stdcall?
226 
227 
228  // __fastcall void VectorGrow(VectorPtr v, uint32_t cap);
229  {
230  Label lVectorGrowRet = a->newLabel();
231  Label lVectorGrowPow2Loop = a->newLabel();
232  Label lVectorGrowPow2LoopEnd = a->newLabel();
233 
234  a->bind(lVectorGrow);
235  a->push(a->zbx);
236  a->push(a->zsi);
237  a->push(a->zdi);
238  a->mov(a->zbx, a->zcx);
239  a->mov(a->zsi, a->zdx);
240 
241  // if (cap <= v->cap)
242  // return;
243  a->sub(edx, ptr(a->zbx, offsetof(Vector, cap)));
244  a->jbe(lVectorGrowRet);
245 
246  // v->cap = 16;
247  a->mov(a->zcx, 16);
248 
249  // while (v->cap < cap) {
250  // v->cap <<= 1;
251  // }
252  a->bind(lVectorGrowPow2Loop);
253  a->mov(a->zdx, a->zcx);
254  a->sub(a->zdx, a->zsi);
255  a->jae(lVectorGrowPow2LoopEnd);
256  a->shl(a->zcx, 1);
257  a->jmp(lVectorGrowPow2Loop);
258  a->bind(lVectorGrowPow2LoopEnd);
259  a->mov(a->zsi, a->zcx);
260  a->mov(ptr(a->zbx, offsetof(Vector, cap)), ecx);
261 
262  // HANDLE heap = GetProcessHeap();
263  if (x64) {
264  a->mov(a->zax, pGetProcessHeap);
265  a->sub(a->zsp, 32);
266  a->call(a->zax);
267  a->add(a->zsp, 32);
268  } else {
269  a->mov(a->zax, pGetProcessHeap);
270  a->call(a->zax);
271  }
272  a->mov(a->zdi, a->zax);
273 
274  // v->data = (DataPtr) HeapReAlloc(heap, 0, v->data, v->cap*sizeof(ElemT));
275  a->shl(a->zsi, static_ilog2(sizeof(ElemT)));
276  a.GenCall(pHeapReAlloc, {a->zdi, 0, ptr(a->zbx, offsetof(Vector, data), a->zbx.getSize()), a->zsi}, cc_stdcall);
277  a->mov(ptr(a->zbx, offsetof(Vector, data)), a->zax);
278 
279  a->bind(lVectorGrowRet);
280  a->pop(a->zdi);
281  a->pop(a->zsi);
282  a->pop(a->zbx);
283  a->ret();
284  }
285 
286 
287  // __fastcall VectorPtr VectorNew(uint32_t cap);
288  {
289  a->bind(lVectorNew);
290  a->push(a->zbx);
291  a->push(a->zsi);
292  a->push(a->zdi);
293  a->mov(a->zdi, a->zcx);
294 
295  // HANDLE heap = GetProcessHeap();
296  if (x64) {
297  a->mov(a->zax, pGetProcessHeap);
298  a->sub(a->zsp, 32);
299  a->call(a->zax);
300  a->add(a->zsp, 32);
301  } else {
302  a->mov(a->zax, pGetProcessHeap);
303  a->call(a->zax);
304  }
305  a->mov(a->zsi, a->zax);
306 
307  // VectorPtr v = (VectorPtr) HeapAlloc(heap, 0, sizeof(Vector));
308  a.GenCall(pHeapAlloc, {a->zsi, 0, sizeof(Vector)}, cc_stdcall);
309  a->mov(a->zbx, a->zax);
310 
311  // v->len = 0;
312  // v->cap = cap;
313  a->xor_(ecx, ecx);
314  a->mov(ptr(a->zbx, offsetof(Vector, len)), ecx);
315  a->mov(ptr(a->zbx, offsetof(Vector, cap)), edi);
316 
317  // v->data = (DataPtr) HeapAlloc(heap, 0, cap * sizeof(ElemT));
318  a->shl(a->zdi, static_ilog2(sizeof(ElemT)));
319  a.GenCall(pHeapAlloc, {a->zsi, 0, a->zdi}, cc_stdcall);
320  a->mov(ptr(a->zbx, offsetof(Vector, data)), a->zax);
321 
322  // return v;
323  a->mov(a->zax, a->zbx);
324  a->pop(a->zdi);
325  a->pop(a->zsi);
326  a->pop(a->zbx);
327  a->ret();
328  }
329 
330 
331  // __fastcall void VectorFree(VectorPtr v);
332  {
333  a->bind(lVectorFree);
334  a->push(a->zbx);
335  a->push(a->zsi);
336  a->sub(a->zsp, 8); // Align rsp to 16 bytes
337  a->mov(a->zbx, a->zcx);
338 
339  // HANDLE heap = GetProcessHeap();
340  if (x64) {
341  a->mov(a->zax, pGetProcessHeap);
342  a->sub(a->zsp, 32);
343  a->call(a->zax);
344  a->add(a->zsp, 32);
345  } else {
346  a->mov(a->zax, pGetProcessHeap);
347  a->call(a->zax);
348  }
349  a->mov(a->zsi, a->zax);
350 
351  // HeapFree(heap, 0, v->data);
352  a.GenCall(pHeapFree, {a->zsi, 0, ptr(a->zbx, offsetof(Vector, data), a->zbx.getSize())}, cc_stdcall);
353 
354  // HeapFree(heap, 0, v);
355  a.GenCall(pHeapFree, {a->zsi, 0, a->zbx}, cc_stdcall);
356 
357  a->add(a->zsp, 8);
358  a->pop(a->zsi);
359  a->pop(a->zbx);
360  a->ret();
361  }
362 
363 
364  // __fastcall void VectorAdd(VectorPtr v, ElemT elem);
365  {
366  a->bind(lVectorAdd);
367  a->push(a->zbx);
368  a->push(a->zsi);
369  a->sub(a->zsp, 8); // Align rsp to 16 bytes
370  a->mov(a->zbx, a->zcx);
371  a->mov(a->zsi, a->zdx);
372 
373  // VectorGrow(v, v->len+1);
374  a->mov(edx, ptr(a->zcx, offsetof(Vector, len)));
375  a->inc(a->zdx);
376  if (x64) {
377  a->sub(a->zsp, 32);
378  a->call(lVectorGrow);
379  a->add(a->zsp, 32);
380  } else {
381  a->call(lVectorGrow);
382  }
383 
384  // v->data[v->len] = data
385  a->mov(ecx, ptr(a->zbx, offsetof(Vector, len)));
386  a->mov(a->zax, ptr(a->zbx, offsetof(Vector, data)));
387  a->mov(ptr(a->zax, a->zcx, static_ilog2(sizeof(ElemT))), a->zsi);
388 
389  // v->len++
390  a->inc(ptr(a->zbx, offsetof(Vector, len)));
391 
392  a->add(a->zsp, 8);
393  a->pop(a->zsi);
394  a->pop(a->zbx);
395  a->ret();
396  }
397 
398 
399  // __fastcall void VectorClear(VectorPtr v);
400  {
401  a->bind(lVectorClear);
402  a->mov(dword_ptr(a->zcx, offsetof(Vector, len)), 0);
403  a->ret();
404  }
405 
406 
407  // __fastcall uint32_t VectorLength(VectorPtr v);
408  {
409  a->bind(lVectorLength);
410  a->mov(eax, ptr(a->zcx, offsetof(Vector, len)));
411  a->ret();
412  }
413 
414 
415  // __fastcall uint32_t VectorCapacity(VectorPtr v);
416  {
417  a->bind(lVectorCapacity);
418  a->mov(eax, ptr(a->zcx, offsetof(Vector, cap)));
419  a->ret();
420  }
421 
422 
423  // __fastcall DataPtr VectorData(VectorPtr v);
424  {
425  a->bind(lVectorData);
426  a->mov(a->zax, ptr(a->zcx, offsetof(Vector, data)));
427  a->ret();
428  }
429 
430 
431  ptr_t codeBaseAddr;
432 
433  if (process) {
434  remoteCode = std::move(mem->Allocate(a->getCodeSize()).result());
435 
436  code = malloc(a->getCodeSize());
437  a->relocCode(code);
438 
439  mem->Write(remoteCode.ptr(), a->getCodeSize(), code);
440 
441  free(code);
442  code = nullptr;
443 
444  codeBaseAddr = remoteCode.ptr();
445  } else {
446  code = VirtualAlloc(NULL, a->getCodeSize(), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
447  a->relocCode(code);
448  codeBaseAddr = (ptr_t) code;
449  }
450 
451  api.vectorNew = codeBaseAddr + a->getLabelOffset(lVectorNew);
452  api.vectorFree = codeBaseAddr + a->getLabelOffset(lVectorFree);
453  api.vectorAdd = codeBaseAddr + a->getLabelOffset(lVectorAdd);
454  api.vectorClear = codeBaseAddr + a->getLabelOffset(lVectorClear);
455  api.vectorLength = codeBaseAddr + a->getLabelOffset(lVectorLength);
456  api.vectorCapacity = codeBaseAddr + a->getLabelOffset(lVectorCapacity);
457  api.vectorData = codeBaseAddr + a->getLabelOffset(lVectorData);
458 
459  api.vectorGrow = codeBaseAddr + a->getLabelOffset(lVectorGrow);
460 
461  if (process) {
462  remAPI = new VectorRemoteAPI {
463  RemoteFunctionFastcall<VECTOR_NEW>(*process, api.vectorNew),
464  RemoteFunctionFastcall<VECTOR_FREE>(*process, api.vectorFree),
465  RemoteFunctionFastcall<VECTOR_ADD>(*process, api.vectorAdd),
466  RemoteFunctionFastcall<VECTOR_CLEAR>(*process, api.vectorClear),
467  RemoteFunctionFastcall<VECTOR_LENGTH>(*process, api.vectorLength),
468  RemoteFunctionFastcall<VECTOR_CAPACITY>(*process, api.vectorCapacity),
469  RemoteFunctionFastcall<VECTOR_DATA>(*process, api.vectorData),
470 
471  RemoteFunctionFastcall<VECTOR_GROW>(*process, api.vectorGrow)
472  };
473  } else {
474  localApi.vectorNew = (VECTOR_NEW) api.vectorNew;
475  localApi.vectorFree = (VECTOR_FREE) api.vectorFree;
476  localApi.vectorAdd = (VECTOR_ADD) api.vectorAdd;
477  localApi.vectorClear = (VECTOR_CLEAR) api.vectorClear;
478  localApi.vectorLength = (VECTOR_LENGTH) api.vectorLength;
479  localApi.vectorCapacity = (VECTOR_CAPACITY) api.vectorCapacity;
480  localApi.vectorData = (VECTOR_DATA) api.vectorData;
481 
482  localApi.vectorGrow = (VECTOR_GROW) api.vectorGrow;
483  }
484 
485  injected = true;
486 }
487 
488 
489 template <typename ElemT, typename IntPtrT>
490 void IPCVector<ElemT, IntPtrT>::uninject()
491 {
492  if (!injected) {
493  return;
494  }
495 
496  if (process) {
497  delete remAPI;
498 
499  remoteCode.Free();
500  remoteCode = MemBlock();
501 
502  process = nullptr;
503  } else {
504  if (code) {
505  VirtualFree(code, 0, MEM_RELEASE);
506  code = nullptr;
507  }
508  }
509 
510  injected = false;
511 }
512 
513 
514 }